diff options
83 files changed, 5817 insertions, 5828 deletions
diff --git a/Lib/_strptime.py b/Lib/_strptime.py index cde6e5f..f7e04cd 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -2,7 +2,7 @@ CLASSES: LocaleTime -- Discovers and/or stores locale-specific time information - TimeRE -- Creates regexes for pattern matching string of text containing + TimeRE -- Creates regexes for pattern matching string of text containing time information as is returned by time.strftime() FUNCTIONS: @@ -10,7 +10,7 @@ FUNCTIONS: year gregorian -- Calculates the Gregorian date based on the Julian day and year - julianday -- Calculates the Julian day since the first of the year based + julianday -- Calculates the Julian day since the first of the year based on the Gregorian date dayofweek -- Calculates the day of the week from the Gregorian date. strptime -- Calculates the time struct represented by the passed-in string @@ -40,23 +40,23 @@ class LocaleTime(object): store the values have mangled names): f_weekday -- full weekday names (7-item list) a_weekday -- abbreviated weekday names (7-item list) - f_month -- full weekday names (14-item list; dummy value in [0], which + f_month -- full weekday names (14-item list; dummy value in [0], which is added by code) - a_month -- abbreviated weekday names (13-item list, dummy value in + a_month -- abbreviated weekday names (13-item list, dummy value in [0], which is added by code) am_pm -- AM/PM representation (2-item list) LC_date_time -- format string for date/time representation (string) LC_date -- format string for date representation (string) LC_time -- format string for time representation (string) - timezone -- daylight- and non-daylight-savings timezone representation - (3-item list; code tacks on blank item at end for + timezone -- daylight- and non-daylight-savings timezone representation + (3-item list; code tacks on blank item at end for possible lack of timezone such as UTC) lang -- Language used by instance (string) - + """ - def __init__(self, f_weekday=None, a_weekday=None, f_month=None, - a_month=None, am_pm=None, LC_date_time=None, LC_time=None, LC_date=None, + def __init__(self, f_weekday=None, a_weekday=None, f_month=None, + a_month=None, am_pm=None, LC_date_time=None, LC_time=None, LC_date=None, timezone=None, lang=None): """Optionally set attributes with passed-in values.""" if f_weekday is None: self.__f_weekday = None @@ -117,9 +117,9 @@ class LocaleTime(object): if not self.__a_weekday: self.__calc_weekday() return self.__a_weekday - f_weekday = property(__get_f_weekday, __set_nothing, + f_weekday = property(__get_f_weekday, __set_nothing, doc="Full weekday names") - a_weekday = property(__get_a_weekday, __set_nothing, + a_weekday = property(__get_a_weekday, __set_nothing, doc="Abbreviated weekday names") def __get_f_month(self): @@ -187,7 +187,7 @@ class LocaleTime(object): f_weekday = [calendar.day_name[i] for i in range(7)] if not self.__a_weekday: self.__a_weekday = a_weekday if not self.__f_weekday: self.__f_weekday = f_weekday - + def __calc_month(self): """Set self.__f_month and self.__a_month using the calendar module.""" a_month = [calendar.month_abbr[i] for i in range(13)] @@ -197,11 +197,11 @@ class LocaleTime(object): def __calc_am_pm(self): """Set self.__am_pm by using time.strftime(). - - The magic date (2002, 3, 17, hour, 44, 44, 2, 76, 0) is not really - that magical; just happened to have used it everywhere else where a + + The magic date (2002, 3, 17, hour, 44, 44, 2, 76, 0) is not really + that magical; just happened to have used it everywhere else where a static date was needed. - + """ am_pm = [] for hour in (01,22): @@ -211,10 +211,10 @@ class LocaleTime(object): def __calc_date_time(self): """Set self.__date_time, self.__date, & self.__time by using time.strftime(). - - Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of - overloaded numbers is minimized. The order in which searches for - values within the format string is very important; it eliminates + + Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of + overloaded numbers is minimized. The order in which searches for + values within the format string is very important; it eliminates possible ambiguity for what something represents. """ @@ -255,17 +255,17 @@ class LocaleTime(object): def __calc_timezone(self): """Set self.__timezone by using time.tzname. - - Empty string used for matching when timezone is not used/needed such + + Empty string used for matching when timezone is not used/needed such as with UTC. """ self.__timezone = self.__pad(time.tzname, 0) def __calc_lang(self): - """Set self.lang by using locale.getlocale() or + """Set self.lang by using locale.getlocale() or locale.getdefaultlocale(). - + """ current_lang = locale.getlocale(locale.LC_TIME)[0] if current_lang: self.__lang = current_lang @@ -277,7 +277,7 @@ class TimeRE(dict): def __init__(self, locale_time=LocaleTime()): """Initialize instance with non-locale regexes and store LocaleTime object.""" super(TimeRE,self).__init__({ - 'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)", #The " \d" option is + 'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)", #The " \d" option is #to make %c from ANSI #C work 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", @@ -299,16 +299,16 @@ class TimeRE(dict): return super(TimeRE,self).__getitem__(fetch) except KeyError: if fetch == 'A': - self[fetch] = self.__seqToRE(self.locale_time.f_weekday, + self[fetch] = self.__seqToRE(self.locale_time.f_weekday, fetch) elif fetch == 'a': - self[fetch] = self.__seqToRE(self.locale_time.a_weekday, + self[fetch] = self.__seqToRE(self.locale_time.a_weekday, fetch) elif fetch == 'B': - self[fetch] = self.__seqToRE(self.locale_time.f_month[1:], + self[fetch] = self.__seqToRE(self.locale_time.f_month[1:], fetch) elif fetch == 'b': - self[fetch] = self.__seqToRE(self.locale_time.a_month[1:], + self[fetch] = self.__seqToRE(self.locale_time.a_month[1:], fetch) elif fetch == 'c': self[fetch] = self.pattern(self.locale_time.LC_date_time) @@ -319,28 +319,28 @@ class TimeRE(dict): elif fetch == 'X': self[fetch] = self.pattern(self.locale_time.LC_time) elif fetch == 'Z': - self[fetch] = self.__seqToRE(self.locale_time.timezone, + self[fetch] = self.__seqToRE(self.locale_time.timezone, fetch) elif fetch == '%': return '%' return super(TimeRE,self).__getitem__(fetch) - + def __seqToRE(self, to_convert, directive): """Convert a list to a regex string for matching directive.""" def sorter(a, b): """Sort based on length. - + Done in case for some strange reason that names in the locale only differ by a suffix and thus want the name with the suffix to match first. - + """ try: a_length = len(a) except TypeError: a_length = 0 try: b_length = len(b) except TypeError: b_length = 0 return cmp(b_length, a_length) - + to_convert = to_convert[:] #Don't want to change value in-place. to_convert.sort(sorter) regex = '(?P<%s>' % directive @@ -357,7 +357,7 @@ class TimeRE(dict): format = format.replace(whitespace, r'\s*') while format.find('%') != -1: directive_index = format.index('%')+1 - processed_format = "%s%s%s" % (processed_format, + processed_format = "%s%s%s" % (processed_format, format[:directive_index-1], self[format[directive_index]]) format = format[directive_index+1:] @@ -371,12 +371,12 @@ class TimeRE(dict): def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): """Convert data_string to a time struct based on the format string or re object; will return an re object for format if data_string is False. - - The object passed in for format may either be a re object compiled by - strptime() or a format string. If False is passed in for data_string - then an re object for format will be returned. The re object + + The object passed in for format may either be a re object compiled by + strptime() or a format string. If False is passed in for data_string + then an re object for format will be returned. The re object must be used with the same language as used to compile the re object. - + """ locale_time = LocaleTime() if isinstance(format, type(re_compile(''))): diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index e3b97ae..9fc96d7 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -39,7 +39,7 @@ class CodecRegistryError(exceptions.LookupError, pass def search_function(encoding): - + # Cache lookup entry = _cache.get(encoding, _unknown) if entry is not _unknown: @@ -72,8 +72,8 @@ def search_function(encoding): if mod is None: # Cache misses _cache[encoding] = None - return None - + return None + # Now ask the module for the registry entry entry = tuple(getregentry()) if len(entry) != 4: diff --git a/Lib/encodings/ascii.py b/Lib/encodings/ascii.py index b92ff5e..05fc36a 100644 --- a/Lib/encodings/ascii.py +++ b/Lib/encodings/ascii.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py index bae9542..085ab14 100644 --- a/Lib/encodings/base64_codec.py +++ b/Lib/encodings/base64_codec.py @@ -51,7 +51,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/charmap.py b/Lib/encodings/charmap.py index 5f5b9cc..de33b12 100644 --- a/Lib/encodings/charmap.py +++ b/Lib/encodings/charmap.py @@ -2,7 +2,7 @@ Use this codec directly rather than through the automatic conversion mechanisms supplied by unicode() and .encode(). - + Written by Marc-Andre Lemburg (mal@lemburg.com). @@ -31,7 +31,7 @@ class StreamWriter(Codec,codecs.StreamWriter): def encode(self,input,errors='strict'): return Codec.encode(input,errors,self.mapping) - + class StreamReader(Codec,codecs.StreamReader): def __init__(self,stream,errors='strict',mapping=None): @@ -48,4 +48,3 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return (Codec.encode,Codec.decode,StreamReader,StreamWriter) - diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py index 3b641bb..42c5b1f 100644 --- a/Lib/encodings/cp037.py +++ b/Lib/encodings/cp037.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,242 +37,242 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # CONTROL - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # CONTROL - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # CONTROL - 0x0009: 0x008d, # CONTROL - 0x000a: 0x008e, # CONTROL - 0x0014: 0x009d, # CONTROL - 0x0015: 0x0085, # CONTROL - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # CONTROL - 0x001a: 0x0092, # CONTROL - 0x001b: 0x008f, # CONTROL - 0x0020: 0x0080, # CONTROL - 0x0021: 0x0081, # CONTROL - 0x0022: 0x0082, # CONTROL - 0x0023: 0x0083, # CONTROL - 0x0024: 0x0084, # CONTROL - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # CONTROL - 0x0029: 0x0089, # CONTROL - 0x002a: 0x008a, # CONTROL - 0x002b: 0x008b, # CONTROL - 0x002c: 0x008c, # CONTROL - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # CONTROL - 0x0031: 0x0091, # CONTROL - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # CONTROL - 0x0034: 0x0094, # CONTROL - 0x0035: 0x0095, # CONTROL - 0x0036: 0x0096, # CONTROL - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # CONTROL - 0x0039: 0x0099, # CONTROL - 0x003a: 0x009a, # CONTROL - 0x003b: 0x009b, # CONTROL - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # CONTROL - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x00a0, # NO-BREAK SPACE - 0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x004a: 0x00a2, # CENT SIGN - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x007c, # VERTICAL LINE - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x005a: 0x0021, # EXCLAMATION MARK - 0x005b: 0x0024, # DOLLAR SIGN - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x00ac, # NOT SIGN - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x006a: 0x00a6, # BROKEN BAR - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0079: 0x0060, # GRAVE ACCENT - 0x007a: 0x003a, # COLON - 0x007b: 0x0023, # NUMBER SIGN - 0x007c: 0x0040, # COMMERCIAL AT - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x0022, # QUOTATION MARK - 0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x008f: 0x00b1, # PLUS-MINUS SIGN - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x009c: 0x00e6, # LATIN SMALL LIGATURE AE - 0x009d: 0x00b8, # CEDILLA - 0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x00b5, # MICRO SIGN - 0x00a1: 0x007e, # TILDE - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ab: 0x00bf, # INVERTED QUESTION MARK - 0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00af: 0x00ae, # REGISTERED SIGN - 0x00b0: 0x005e, # CIRCUMFLEX ACCENT - 0x00b1: 0x00a3, # POUND SIGN - 0x00b2: 0x00a5, # YEN SIGN - 0x00b3: 0x00b7, # MIDDLE DOT - 0x00b4: 0x00a9, # COPYRIGHT SIGN - 0x00b5: 0x00a7, # SECTION SIGN - 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ba: 0x005b, # LEFT SQUARE BRACKET - 0x00bb: 0x005d, # RIGHT SQUARE BRACKET - 0x00bc: 0x00af, # MACRON - 0x00bd: 0x00a8, # DIAERESIS - 0x00be: 0x00b4, # ACUTE ACCENT - 0x00bf: 0x00d7, # MULTIPLICATION SIGN - 0x00c0: 0x007b, # LEFT CURLY BRACKET - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00d0: 0x007d, # RIGHT CURLY BRACKET - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b9, # SUPERSCRIPT ONE - 0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00e0: 0x005c, # REVERSE SOLIDUS - 0x00e1: 0x00f7, # DIVISION SIGN - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ff: 0x009f, # CONTROL + 0x0004: 0x009c, # CONTROL + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # CONTROL + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # CONTROL + 0x0009: 0x008d, # CONTROL + 0x000a: 0x008e, # CONTROL + 0x0014: 0x009d, # CONTROL + 0x0015: 0x0085, # CONTROL + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # CONTROL + 0x001a: 0x0092, # CONTROL + 0x001b: 0x008f, # CONTROL + 0x0020: 0x0080, # CONTROL + 0x0021: 0x0081, # CONTROL + 0x0022: 0x0082, # CONTROL + 0x0023: 0x0083, # CONTROL + 0x0024: 0x0084, # CONTROL + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # CONTROL + 0x0029: 0x0089, # CONTROL + 0x002a: 0x008a, # CONTROL + 0x002b: 0x008b, # CONTROL + 0x002c: 0x008c, # CONTROL + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # CONTROL + 0x0031: 0x0091, # CONTROL + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # CONTROL + 0x0034: 0x0094, # CONTROL + 0x0035: 0x0095, # CONTROL + 0x0036: 0x0096, # CONTROL + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # CONTROL + 0x0039: 0x0099, # CONTROL + 0x003a: 0x009a, # CONTROL + 0x003b: 0x009b, # CONTROL + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # CONTROL + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x00a0, # NO-BREAK SPACE + 0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x004a: 0x00a2, # CENT SIGN + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x007c, # VERTICAL LINE + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x005a: 0x0021, # EXCLAMATION MARK + 0x005b: 0x0024, # DOLLAR SIGN + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x00ac, # NOT SIGN + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x006a: 0x00a6, # BROKEN BAR + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0079: 0x0060, # GRAVE ACCENT + 0x007a: 0x003a, # COLON + 0x007b: 0x0023, # NUMBER SIGN + 0x007c: 0x0040, # COMMERCIAL AT + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x0022, # QUOTATION MARK + 0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x008f: 0x00b1, # PLUS-MINUS SIGN + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x009c: 0x00e6, # LATIN SMALL LIGATURE AE + 0x009d: 0x00b8, # CEDILLA + 0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x00b5, # MICRO SIGN + 0x00a1: 0x007e, # TILDE + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ab: 0x00bf, # INVERTED QUESTION MARK + 0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00af: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x005e, # CIRCUMFLEX ACCENT + 0x00b1: 0x00a3, # POUND SIGN + 0x00b2: 0x00a5, # YEN SIGN + 0x00b3: 0x00b7, # MIDDLE DOT + 0x00b4: 0x00a9, # COPYRIGHT SIGN + 0x00b5: 0x00a7, # SECTION SIGN + 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ba: 0x005b, # LEFT SQUARE BRACKET + 0x00bb: 0x005d, # RIGHT SQUARE BRACKET + 0x00bc: 0x00af, # MACRON + 0x00bd: 0x00a8, # DIAERESIS + 0x00be: 0x00b4, # ACUTE ACCENT + 0x00bf: 0x00d7, # MULTIPLICATION SIGN + 0x00c0: 0x007b, # LEFT CURLY BRACKET + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00d0: 0x007d, # RIGHT CURLY BRACKET + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b9, # SUPERSCRIPT ONE + 0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00e0: 0x005c, # REVERSE SOLIDUS + 0x00e1: 0x00f7, # DIVISION SIGN + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ff: 0x009f, # CONTROL }) ### Encoding Map diff --git a/Lib/encodings/cp1006.py b/Lib/encodings/cp1006.py index 0a3c3af..bbd3d87 100644 --- a/Lib/encodings/cp1006.py +++ b/Lib/encodings/cp1006.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,100 +37,100 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO - 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE - 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO - 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE - 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR - 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE - 0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX - 0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE - 0x00ab: 0x060c, # ARABIC COMMA - 0x00ac: 0x061b, # ARABIC SEMICOLON - 0x00ae: 0x061f, # ARABIC QUESTION MARK - 0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM - 0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM - 0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM - 0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM - 0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM - 0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM - 0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN - 0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM - 0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM - 0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM - 0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM - 0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM - 0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM - 0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM - 0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM - 0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM - 0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - 0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - 0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - 0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM - 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM - 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE + 0x00ab: 0x060c, # ARABIC COMMA + 0x00ac: 0x061b, # ARABIC SEMICOLON + 0x00ae: 0x061f, # ARABIC QUESTION MARK + 0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM + 0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM + 0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM + 0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM + 0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM + 0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM + 0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN + 0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM + 0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM + 0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM + 0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM + 0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM + 0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM + 0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM + 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM }) ### Encoding Map diff --git a/Lib/encodings/cp1026.py b/Lib/encodings/cp1026.py index cce5af6..3aec91b 100644 --- a/Lib/encodings/cp1026.py +++ b/Lib/encodings/cp1026.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,242 +37,242 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # CONTROL - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # CONTROL - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # CONTROL - 0x0009: 0x008d, # CONTROL - 0x000a: 0x008e, # CONTROL - 0x0014: 0x009d, # CONTROL - 0x0015: 0x0085, # CONTROL - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # CONTROL - 0x001a: 0x0092, # CONTROL - 0x001b: 0x008f, # CONTROL - 0x0020: 0x0080, # CONTROL - 0x0021: 0x0081, # CONTROL - 0x0022: 0x0082, # CONTROL - 0x0023: 0x0083, # CONTROL - 0x0024: 0x0084, # CONTROL - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # CONTROL - 0x0029: 0x0089, # CONTROL - 0x002a: 0x008a, # CONTROL - 0x002b: 0x008b, # CONTROL - 0x002c: 0x008c, # CONTROL - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # CONTROL - 0x0031: 0x0091, # CONTROL - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # CONTROL - 0x0034: 0x0094, # CONTROL - 0x0035: 0x0095, # CONTROL - 0x0036: 0x0096, # CONTROL - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # CONTROL - 0x0039: 0x0099, # CONTROL - 0x003a: 0x009a, # CONTROL - 0x003b: 0x009b, # CONTROL - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # CONTROL - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x00a0, # NO-BREAK SPACE - 0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0048: 0x007b, # LEFT CURLY BRACKET - 0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x004a: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x0021, # EXCLAMATION MARK - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x005a: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x005b: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x005e, # CIRCUMFLEX ACCENT - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0068: 0x005b, # LEFT SQUARE BRACKET - 0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x006a: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0079: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x007a: 0x003a, # COLON - 0x007b: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x007c: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008c: 0x007d, # RIGHT CURLY BRACKET - 0x008d: 0x0060, # GRAVE ACCENT - 0x008e: 0x00a6, # BROKEN BAR - 0x008f: 0x00b1, # PLUS-MINUS SIGN - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x009c: 0x00e6, # LATIN SMALL LIGATURE AE - 0x009d: 0x00b8, # CEDILLA - 0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x00b5, # MICRO SIGN - 0x00a1: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ab: 0x00bf, # INVERTED QUESTION MARK - 0x00ac: 0x005d, # RIGHT SQUARE BRACKET - 0x00ad: 0x0024, # DOLLAR SIGN - 0x00ae: 0x0040, # COMMERCIAL AT - 0x00af: 0x00ae, # REGISTERED SIGN - 0x00b0: 0x00a2, # CENT SIGN - 0x00b1: 0x00a3, # POUND SIGN - 0x00b2: 0x00a5, # YEN SIGN - 0x00b3: 0x00b7, # MIDDLE DOT - 0x00b4: 0x00a9, # COPYRIGHT SIGN - 0x00b5: 0x00a7, # SECTION SIGN - 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ba: 0x00ac, # NOT SIGN - 0x00bb: 0x007c, # VERTICAL LINE - 0x00bc: 0x00af, # MACRON - 0x00bd: 0x00a8, # DIAERESIS - 0x00be: 0x00b4, # ACUTE ACCENT - 0x00bf: 0x00d7, # MULTIPLICATION SIGN - 0x00c0: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00cc: 0x007e, # TILDE - 0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00d0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b9, # SUPERSCRIPT ONE - 0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x005c, # REVERSE SOLIDUS - 0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00e0: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00e1: 0x00f7, # DIVISION SIGN - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00ec: 0x0023, # NUMBER SIGN - 0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0022, # QUOTATION MARK - 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ff: 0x009f, # CONTROL + 0x0004: 0x009c, # CONTROL + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # CONTROL + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # CONTROL + 0x0009: 0x008d, # CONTROL + 0x000a: 0x008e, # CONTROL + 0x0014: 0x009d, # CONTROL + 0x0015: 0x0085, # CONTROL + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # CONTROL + 0x001a: 0x0092, # CONTROL + 0x001b: 0x008f, # CONTROL + 0x0020: 0x0080, # CONTROL + 0x0021: 0x0081, # CONTROL + 0x0022: 0x0082, # CONTROL + 0x0023: 0x0083, # CONTROL + 0x0024: 0x0084, # CONTROL + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # CONTROL + 0x0029: 0x0089, # CONTROL + 0x002a: 0x008a, # CONTROL + 0x002b: 0x008b, # CONTROL + 0x002c: 0x008c, # CONTROL + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # CONTROL + 0x0031: 0x0091, # CONTROL + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # CONTROL + 0x0034: 0x0094, # CONTROL + 0x0035: 0x0095, # CONTROL + 0x0036: 0x0096, # CONTROL + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # CONTROL + 0x0039: 0x0099, # CONTROL + 0x003a: 0x009a, # CONTROL + 0x003b: 0x009b, # CONTROL + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # CONTROL + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x00a0, # NO-BREAK SPACE + 0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0048: 0x007b, # LEFT CURLY BRACKET + 0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x004a: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x0021, # EXCLAMATION MARK + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x005a: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x005b: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x005e, # CIRCUMFLEX ACCENT + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0068: 0x005b, # LEFT SQUARE BRACKET + 0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x006a: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0079: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x007a: 0x003a, # COLON + 0x007b: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x007c: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008c: 0x007d, # RIGHT CURLY BRACKET + 0x008d: 0x0060, # GRAVE ACCENT + 0x008e: 0x00a6, # BROKEN BAR + 0x008f: 0x00b1, # PLUS-MINUS SIGN + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x009c: 0x00e6, # LATIN SMALL LIGATURE AE + 0x009d: 0x00b8, # CEDILLA + 0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x00b5, # MICRO SIGN + 0x00a1: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ab: 0x00bf, # INVERTED QUESTION MARK + 0x00ac: 0x005d, # RIGHT SQUARE BRACKET + 0x00ad: 0x0024, # DOLLAR SIGN + 0x00ae: 0x0040, # COMMERCIAL AT + 0x00af: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00a2, # CENT SIGN + 0x00b1: 0x00a3, # POUND SIGN + 0x00b2: 0x00a5, # YEN SIGN + 0x00b3: 0x00b7, # MIDDLE DOT + 0x00b4: 0x00a9, # COPYRIGHT SIGN + 0x00b5: 0x00a7, # SECTION SIGN + 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ba: 0x00ac, # NOT SIGN + 0x00bb: 0x007c, # VERTICAL LINE + 0x00bc: 0x00af, # MACRON + 0x00bd: 0x00a8, # DIAERESIS + 0x00be: 0x00b4, # ACUTE ACCENT + 0x00bf: 0x00d7, # MULTIPLICATION SIGN + 0x00c0: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00cc: 0x007e, # TILDE + 0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00d0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b9, # SUPERSCRIPT ONE + 0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x005c, # REVERSE SOLIDUS + 0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00e0: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00e1: 0x00f7, # DIVISION SIGN + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00ec: 0x0023, # NUMBER SIGN + 0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0022, # QUOTATION MARK + 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ff: 0x009f, # CONTROL }) ### Encoding Map diff --git a/Lib/encodings/cp1140.py b/Lib/encodings/cp1140.py index 68f64ab..5fd31a6 100644 --- a/Lib/encodings/cp1140.py +++ b/Lib/encodings/cp1140.py @@ -14,14 +14,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -36,10 +36,9 @@ def getregentry(): decoding_map = copy.copy(cp037.decoding_map) decoding_map.update({ - 0x009f: 0x20ac # EURO SIGN + 0x009f: 0x20ac # EURO SIGN }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) - diff --git a/Lib/encodings/cp1250.py b/Lib/encodings/cp1250.py index 39cbc33..85774ed 100644 --- a/Lib/encodings/cp1250.py +++ b/Lib/encodings/cp1250.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,85 +37,85 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: None, # UNDEFINED - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a1: 0x02c7, # CARON - 0x00a2: 0x02d8, # BREVE - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ff: 0x02d9, # DOT ABOVE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: None, # UNDEFINED + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a1: 0x02c7, # CARON + 0x00a2: 0x02d8, # BREVE + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ff: 0x02d9, # DOT ABOVE }) ### Encoding Map diff --git a/Lib/encodings/cp1251.py b/Lib/encodings/cp1251.py index 4d4b6ee..f191b06 100644 --- a/Lib/encodings/cp1251.py +++ b/Lib/encodings/cp1251.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,119 +37,119 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x20ac, # EURO SIGN - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00b9: 0x2116, # NUMERO SIGN - 0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA + 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x20ac, # EURO SIGN + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00b9: 0x2116, # NUMERO SIGN + 0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA }) ### Encoding Map diff --git a/Lib/encodings/cp1252.py b/Lib/encodings/cp1252.py index 647f907..d1ecaba 100644 --- a/Lib/encodings/cp1252.py +++ b/Lib/encodings/cp1252.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,38 +37,38 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS }) ### Encoding Map diff --git a/Lib/encodings/cp1253.py b/Lib/encodings/cp1253.py index bfb4723..22c70df 100644 --- a/Lib/encodings/cp1253.py +++ b/Lib/encodings/cp1253.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,113 +37,113 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a1: 0x0385, # GREEK DIALYTIKA TONOS - 0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00aa: None, # UNDEFINED - 0x00af: 0x2015, # HORIZONTAL BAR - 0x00b4: 0x0384, # GREEK TONOS - 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU - 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU - 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI - 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00d2: None, # UNDEFINED - 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00ec: 0x03bc, # GREEK SMALL LETTER MU - 0x00ed: 0x03bd, # GREEK SMALL LETTER NU - 0x00ee: 0x03be, # GREEK SMALL LETTER XI - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a1: 0x0385, # GREEK DIALYTIKA TONOS + 0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00aa: None, # UNDEFINED + 0x00af: 0x2015, # HORIZONTAL BAR + 0x00b4: 0x0384, # GREEK TONOS + 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU + 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU + 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI + 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, # UNDEFINED + 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ec: 0x03bc, # GREEK SMALL LETTER MU + 0x00ed: 0x03bd, # GREEK SMALL LETTER NU + 0x00ee: 0x03be, # GREEK SMALL LETTER XI + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ff: None, # UNDEFINED }) ### Encoding Map diff --git a/Lib/encodings/cp1254.py b/Lib/encodings/cp1254.py index dc662ed..718ed27 100644 --- a/Lib/encodings/cp1254.py +++ b/Lib/encodings/cp1254.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,44 +37,44 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA }) ### Encoding Map diff --git a/Lib/encodings/cp1255.py b/Lib/encodings/cp1255.py index 6f70654..b20f5da 100644 --- a/Lib/encodings/cp1255.py +++ b/Lib/encodings/cp1255.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,105 +37,105 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a4: 0x20aa, # NEW SHEQEL SIGN - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00c0: 0x05b0, # HEBREW POINT SHEVA - 0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL - 0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH - 0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS - 0x00c4: 0x05b4, # HEBREW POINT HIRIQ - 0x00c5: 0x05b5, # HEBREW POINT TSERE - 0x00c6: 0x05b6, # HEBREW POINT SEGOL - 0x00c7: 0x05b7, # HEBREW POINT PATAH - 0x00c8: 0x05b8, # HEBREW POINT QAMATS - 0x00c9: 0x05b9, # HEBREW POINT HOLAM - 0x00ca: None, # UNDEFINED - 0x00cb: 0x05bb, # HEBREW POINT QUBUTS - 0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ - 0x00cd: 0x05bd, # HEBREW POINT METEG - 0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF - 0x00cf: 0x05bf, # HEBREW POINT RAFE - 0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ - 0x00d1: 0x05c1, # HEBREW POINT SHIN DOT - 0x00d2: 0x05c2, # HEBREW POINT SIN DOT - 0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ - 0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV - 0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD - 0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD - 0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH - 0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM - 0x00d9: None, # UNDEFINED - 0x00da: None, # UNDEFINED - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: None, # UNDEFINED - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, # UNDEFINED - 0x00fc: None, # UNDEFINED - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a4: 0x20aa, # NEW SHEQEL SIGN + 0x00aa: 0x00d7, # MULTIPLICATION SIGN + 0x00ba: 0x00f7, # DIVISION SIGN + 0x00c0: 0x05b0, # HEBREW POINT SHEVA + 0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL + 0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH + 0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS + 0x00c4: 0x05b4, # HEBREW POINT HIRIQ + 0x00c5: 0x05b5, # HEBREW POINT TSERE + 0x00c6: 0x05b6, # HEBREW POINT SEGOL + 0x00c7: 0x05b7, # HEBREW POINT PATAH + 0x00c8: 0x05b8, # HEBREW POINT QAMATS + 0x00c9: 0x05b9, # HEBREW POINT HOLAM + 0x00ca: None, # UNDEFINED + 0x00cb: 0x05bb, # HEBREW POINT QUBUTS + 0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ + 0x00cd: 0x05bd, # HEBREW POINT METEG + 0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF + 0x00cf: 0x05bf, # HEBREW POINT RAFE + 0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ + 0x00d1: 0x05c1, # HEBREW POINT SHIN DOT + 0x00d2: 0x05c2, # HEBREW POINT SIN DOT + 0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ + 0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD + 0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH + 0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM + 0x00d9: None, # UNDEFINED + 0x00da: None, # UNDEFINED + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: None, # UNDEFINED + 0x00e0: 0x05d0, # HEBREW LETTER ALEF + 0x00e1: 0x05d1, # HEBREW LETTER BET + 0x00e2: 0x05d2, # HEBREW LETTER GIMEL + 0x00e3: 0x05d3, # HEBREW LETTER DALET + 0x00e4: 0x05d4, # HEBREW LETTER HE + 0x00e5: 0x05d5, # HEBREW LETTER VAV + 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN + 0x00e7: 0x05d7, # HEBREW LETTER HET + 0x00e8: 0x05d8, # HEBREW LETTER TET + 0x00e9: 0x05d9, # HEBREW LETTER YOD + 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF + 0x00eb: 0x05db, # HEBREW LETTER KAF + 0x00ec: 0x05dc, # HEBREW LETTER LAMED + 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM + 0x00ee: 0x05de, # HEBREW LETTER MEM + 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN + 0x00f0: 0x05e0, # HEBREW LETTER NUN + 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH + 0x00f2: 0x05e2, # HEBREW LETTER AYIN + 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE + 0x00f4: 0x05e4, # HEBREW LETTER PE + 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x00f6: 0x05e6, # HEBREW LETTER TSADI + 0x00f7: 0x05e7, # HEBREW LETTER QOF + 0x00f8: 0x05e8, # HEBREW LETTER RESH + 0x00f9: 0x05e9, # HEBREW LETTER SHIN + 0x00fa: 0x05ea, # HEBREW LETTER TAV + 0x00fb: None, # UNDEFINED + 0x00fc: None, # UNDEFINED + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, # UNDEFINED }) ### Encoding Map diff --git a/Lib/encodings/cp1256.py b/Lib/encodings/cp1256.py index 40ed577..f3e694c 100644 --- a/Lib/encodings/cp1256.py +++ b/Lib/encodings/cp1256.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,91 +37,91 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: 0x067e, # ARABIC LETTER PEH - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0679, # ARABIC LETTER TTEH - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: 0x0686, # ARABIC LETTER TCHEH - 0x008e: 0x0698, # ARABIC LETTER JEH - 0x008f: 0x0688, # ARABIC LETTER DDAL - 0x0090: 0x06af, # ARABIC LETTER GAF - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x06a9, # ARABIC LETTER KEHEH - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0691, # ARABIC LETTER RREH - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: 0x200c, # ZERO WIDTH NON-JOINER - 0x009e: 0x200d, # ZERO WIDTH JOINER - 0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA - 0x00a1: 0x060c, # ARABIC COMMA - 0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE - 0x00ba: 0x061b, # ARABIC SEMICOLON - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d8: 0x0637, # ARABIC LETTER TAH - 0x00d9: 0x0638, # ARABIC LETTER ZAH - 0x00da: 0x0639, # ARABIC LETTER AIN - 0x00db: 0x063a, # ARABIC LETTER GHAIN - 0x00dc: 0x0640, # ARABIC TATWEEL - 0x00dd: 0x0641, # ARABIC LETTER FEH - 0x00de: 0x0642, # ARABIC LETTER QAF - 0x00df: 0x0643, # ARABIC LETTER KAF - 0x00e1: 0x0644, # ARABIC LETTER LAM - 0x00e3: 0x0645, # ARABIC LETTER MEEM - 0x00e4: 0x0646, # ARABIC LETTER NOON - 0x00e5: 0x0647, # ARABIC LETTER HEH - 0x00e6: 0x0648, # ARABIC LETTER WAW - 0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ed: 0x064a, # ARABIC LETTER YEH - 0x00f0: 0x064b, # ARABIC FATHATAN - 0x00f1: 0x064c, # ARABIC DAMMATAN - 0x00f2: 0x064d, # ARABIC KASRATAN - 0x00f3: 0x064e, # ARABIC FATHA - 0x00f5: 0x064f, # ARABIC DAMMA - 0x00f6: 0x0650, # ARABIC KASRA - 0x00f8: 0x0651, # ARABIC SHADDA - 0x00fa: 0x0652, # ARABIC SUKUN - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: 0x067e, # ARABIC LETTER PEH + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0679, # ARABIC LETTER TTEH + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: 0x0686, # ARABIC LETTER TCHEH + 0x008e: 0x0698, # ARABIC LETTER JEH + 0x008f: 0x0688, # ARABIC LETTER DDAL + 0x0090: 0x06af, # ARABIC LETTER GAF + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x06a9, # ARABIC LETTER KEHEH + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0691, # ARABIC LETTER RREH + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: 0x200c, # ZERO WIDTH NON-JOINER + 0x009e: 0x200d, # ZERO WIDTH JOINER + 0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA + 0x00a1: 0x060c, # ARABIC COMMA + 0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE + 0x00ba: 0x061b, # ARABIC SEMICOLON + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d8: 0x0637, # ARABIC LETTER TAH + 0x00d9: 0x0638, # ARABIC LETTER ZAH + 0x00da: 0x0639, # ARABIC LETTER AIN + 0x00db: 0x063a, # ARABIC LETTER GHAIN + 0x00dc: 0x0640, # ARABIC TATWEEL + 0x00dd: 0x0641, # ARABIC LETTER FEH + 0x00de: 0x0642, # ARABIC LETTER QAF + 0x00df: 0x0643, # ARABIC LETTER KAF + 0x00e1: 0x0644, # ARABIC LETTER LAM + 0x00e3: 0x0645, # ARABIC LETTER MEEM + 0x00e4: 0x0646, # ARABIC LETTER NOON + 0x00e5: 0x0647, # ARABIC LETTER HEH + 0x00e6: 0x0648, # ARABIC LETTER WAW + 0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ed: 0x064a, # ARABIC LETTER YEH + 0x00f0: 0x064b, # ARABIC FATHATAN + 0x00f1: 0x064c, # ARABIC DAMMATAN + 0x00f2: 0x064d, # ARABIC KASRATAN + 0x00f3: 0x064e, # ARABIC FATHA + 0x00f5: 0x064f, # ARABIC DAMMA + 0x00f6: 0x0650, # ARABIC KASRA + 0x00f8: 0x0651, # ARABIC SHADDA + 0x00fa: 0x0652, # ARABIC SUKUN + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) ### Encoding Map diff --git a/Lib/encodings/cp1257.py b/Lib/encodings/cp1257.py index baaf367..1a21850 100644 --- a/Lib/encodings/cp1257.py +++ b/Lib/encodings/cp1257.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,93 +37,93 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: None, # UNDEFINED - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: 0x00a8, # DIAERESIS - 0x008e: 0x02c7, # CARON - 0x008f: 0x00b8, # CEDILLA - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: 0x00af, # MACRON - 0x009e: 0x02db, # OGONEK - 0x009f: None, # UNDEFINED - 0x00a1: None, # UNDEFINED - 0x00a5: None, # UNDEFINED - 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00bf: 0x00e6, # LATIN SMALL LETTER AE - 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ff: 0x02d9, # DOT ABOVE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: None, # UNDEFINED + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: 0x00a8, # DIAERESIS + 0x008e: 0x02c7, # CARON + 0x008f: 0x00b8, # CEDILLA + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: 0x00af, # MACRON + 0x009e: 0x02db, # OGONEK + 0x009f: None, # UNDEFINED + 0x00a1: None, # UNDEFINED + 0x00a5: None, # UNDEFINED + 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00bf: 0x00e6, # LATIN SMALL LETTER AE + 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ff: 0x02d9, # DOT ABOVE }) ### Encoding Map diff --git a/Lib/encodings/cp1258.py b/Lib/encodings/cp1258.py index 9a2827d..03a6d3b 100644 --- a/Lib/encodings/cp1258.py +++ b/Lib/encodings/cp1258.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,52 +37,52 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00cc: 0x0300, # COMBINING GRAVE ACCENT - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x0309, # COMBINING HOOK ABOVE - 0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN - 0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN - 0x00de: 0x0303, # COMBINING TILDE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00ec: 0x0301, # COMBINING ACUTE ACCENT - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f2: 0x0323, # COMBINING DOT BELOW - 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN - 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN - 0x00fe: 0x20ab, # DONG SIGN + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00cc: 0x0300, # COMBINING GRAVE ACCENT + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x0309, # COMBINING HOOK ABOVE + 0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN + 0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN + 0x00de: 0x0303, # COMBINING TILDE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00ec: 0x0301, # COMBINING ACUTE ACCENT + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f2: 0x0323, # COMBINING DOT BELOW + 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN + 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN + 0x00fe: 0x20ab, # DONG SIGN }) ### Encoding Map diff --git a/Lib/encodings/cp424.py b/Lib/encodings/cp424.py index 4689996..6a53233 100644 --- a/Lib/encodings/cp424.py +++ b/Lib/encodings/cp424.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,242 +37,242 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # SELECT - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # REQUIRED NEW LINE - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # GRAPHIC ESCAPE - 0x0009: 0x008d, # SUPERSCRIPT - 0x000a: 0x008e, # REPEAT - 0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION - 0x0015: 0x0085, # NEW LINE - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION - 0x001a: 0x0092, # UNIT BACK SPACE - 0x001b: 0x008f, # CUSTOMER USE ONE - 0x0020: 0x0080, # DIGIT SELECT - 0x0021: 0x0081, # START OF SIGNIFICANCE - 0x0022: 0x0082, # FIELD SEPARATOR - 0x0023: 0x0083, # WORD UNDERSCORE - 0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # SET ATTRIBUTE - 0x0029: 0x0089, # START FIELD EXTENDED - 0x002a: 0x008a, # SET MODE OR SWITCH - 0x002b: 0x008b, # CONTROL SEQUENCE PREFIX - 0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # <reserved> - 0x0031: 0x0091, # <reserved> - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # INDEX RETURN - 0x0034: 0x0094, # PRESENTATION POSITION - 0x0035: 0x0095, # TRANSPARENT - 0x0036: 0x0096, # NUMERIC BACKSPACE - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # SUBSCRIPT - 0x0039: 0x0099, # INDENT TABULATION - 0x003a: 0x009a, # REVERSE FORM FEED - 0x003b: 0x009b, # CUSTOMER USE THREE - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # <reserved> - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x05d0, # HEBREW LETTER ALEF - 0x0042: 0x05d1, # HEBREW LETTER BET - 0x0043: 0x05d2, # HEBREW LETTER GIMEL - 0x0044: 0x05d3, # HEBREW LETTER DALET - 0x0045: 0x05d4, # HEBREW LETTER HE - 0x0046: 0x05d5, # HEBREW LETTER VAV - 0x0047: 0x05d6, # HEBREW LETTER ZAYIN - 0x0048: 0x05d7, # HEBREW LETTER HET - 0x0049: 0x05d8, # HEBREW LETTER TET - 0x004a: 0x00a2, # CENT SIGN - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x007c, # VERTICAL LINE - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x05d9, # HEBREW LETTER YOD - 0x0052: 0x05da, # HEBREW LETTER FINAL KAF - 0x0053: 0x05db, # HEBREW LETTER KAF - 0x0054: 0x05dc, # HEBREW LETTER LAMED - 0x0055: 0x05dd, # HEBREW LETTER FINAL MEM - 0x0056: 0x05de, # HEBREW LETTER MEM - 0x0057: 0x05df, # HEBREW LETTER FINAL NUN - 0x0058: 0x05e0, # HEBREW LETTER NUN - 0x0059: 0x05e1, # HEBREW LETTER SAMEKH - 0x005a: 0x0021, # EXCLAMATION MARK - 0x005b: 0x0024, # DOLLAR SIGN - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x00ac, # NOT SIGN - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x05e2, # HEBREW LETTER AYIN - 0x0063: 0x05e3, # HEBREW LETTER FINAL PE - 0x0064: 0x05e4, # HEBREW LETTER PE - 0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0066: 0x05e6, # HEBREW LETTER TSADI - 0x0067: 0x05e7, # HEBREW LETTER QOF - 0x0068: 0x05e8, # HEBREW LETTER RESH - 0x0069: 0x05e9, # HEBREW LETTER SHIN - 0x006a: 0x00a6, # BROKEN BAR - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: None, # UNDEFINED - 0x0071: 0x05ea, # HEBREW LETTER TAV - 0x0072: None, # UNDEFINED - 0x0073: None, # UNDEFINED - 0x0074: 0x00a0, # NO-BREAK SPACE - 0x0075: None, # UNDEFINED - 0x0076: None, # UNDEFINED - 0x0077: None, # UNDEFINED - 0x0078: 0x2017, # DOUBLE LOW LINE - 0x0079: 0x0060, # GRAVE ACCENT - 0x007a: 0x003a, # COLON - 0x007b: 0x0023, # NUMBER SIGN - 0x007c: 0x0040, # COMMERCIAL AT - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x0022, # QUOTATION MARK - 0x0080: None, # UNDEFINED - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: 0x00b1, # PLUS-MINUS SIGN - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: None, # UNDEFINED - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0x00b8, # CEDILLA - 0x009e: None, # UNDEFINED - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x00b5, # MICRO SIGN - 0x00a1: 0x007e, # TILDE - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: None, # UNDEFINED - 0x00ab: None, # UNDEFINED - 0x00ac: None, # UNDEFINED - 0x00ad: None, # UNDEFINED - 0x00ae: None, # UNDEFINED - 0x00af: 0x00ae, # REGISTERED SIGN - 0x00b0: 0x005e, # CIRCUMFLEX ACCENT - 0x00b1: 0x00a3, # POUND SIGN - 0x00b2: 0x00a5, # YEN SIGN - 0x00b3: 0x00b7, # MIDDLE DOT - 0x00b4: 0x00a9, # COPYRIGHT SIGN - 0x00b5: 0x00a7, # SECTION SIGN - 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ba: 0x005b, # LEFT SQUARE BRACKET - 0x00bb: 0x005d, # RIGHT SQUARE BRACKET - 0x00bc: 0x00af, # MACRON - 0x00bd: 0x00a8, # DIAERESIS - 0x00be: 0x00b4, # ACUTE ACCENT - 0x00bf: 0x00d7, # MULTIPLICATION SIGN - 0x00c0: 0x007b, # LEFT CURLY BRACKET - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: None, # UNDEFINED - 0x00cc: None, # UNDEFINED - 0x00cd: None, # UNDEFINED - 0x00ce: None, # UNDEFINED - 0x00cf: None, # UNDEFINED - 0x00d0: 0x007d, # RIGHT CURLY BRACKET - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b9, # SUPERSCRIPT ONE - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: None, # UNDEFINED - 0x00e0: 0x005c, # REVERSE SOLIDUS - 0x00e1: 0x00f7, # DIVISION SIGN - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: None, # UNDEFINED - 0x00ec: None, # UNDEFINED - 0x00ed: None, # UNDEFINED - 0x00ee: None, # UNDEFINED - 0x00ef: None, # UNDEFINED - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: None, # UNDEFINED - 0x00fc: None, # UNDEFINED - 0x00fd: None, # UNDEFINED - 0x00fe: None, # UNDEFINED - 0x00ff: 0x009f, # EIGHT ONES + 0x0004: 0x009c, # SELECT + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # REQUIRED NEW LINE + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # GRAPHIC ESCAPE + 0x0009: 0x008d, # SUPERSCRIPT + 0x000a: 0x008e, # REPEAT + 0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION + 0x0015: 0x0085, # NEW LINE + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION + 0x001a: 0x0092, # UNIT BACK SPACE + 0x001b: 0x008f, # CUSTOMER USE ONE + 0x0020: 0x0080, # DIGIT SELECT + 0x0021: 0x0081, # START OF SIGNIFICANCE + 0x0022: 0x0082, # FIELD SEPARATOR + 0x0023: 0x0083, # WORD UNDERSCORE + 0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # SET ATTRIBUTE + 0x0029: 0x0089, # START FIELD EXTENDED + 0x002a: 0x008a, # SET MODE OR SWITCH + 0x002b: 0x008b, # CONTROL SEQUENCE PREFIX + 0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # <reserved> + 0x0031: 0x0091, # <reserved> + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # INDEX RETURN + 0x0034: 0x0094, # PRESENTATION POSITION + 0x0035: 0x0095, # TRANSPARENT + 0x0036: 0x0096, # NUMERIC BACKSPACE + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # SUBSCRIPT + 0x0039: 0x0099, # INDENT TABULATION + 0x003a: 0x009a, # REVERSE FORM FEED + 0x003b: 0x009b, # CUSTOMER USE THREE + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # <reserved> + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x05d0, # HEBREW LETTER ALEF + 0x0042: 0x05d1, # HEBREW LETTER BET + 0x0043: 0x05d2, # HEBREW LETTER GIMEL + 0x0044: 0x05d3, # HEBREW LETTER DALET + 0x0045: 0x05d4, # HEBREW LETTER HE + 0x0046: 0x05d5, # HEBREW LETTER VAV + 0x0047: 0x05d6, # HEBREW LETTER ZAYIN + 0x0048: 0x05d7, # HEBREW LETTER HET + 0x0049: 0x05d8, # HEBREW LETTER TET + 0x004a: 0x00a2, # CENT SIGN + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x007c, # VERTICAL LINE + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x05d9, # HEBREW LETTER YOD + 0x0052: 0x05da, # HEBREW LETTER FINAL KAF + 0x0053: 0x05db, # HEBREW LETTER KAF + 0x0054: 0x05dc, # HEBREW LETTER LAMED + 0x0055: 0x05dd, # HEBREW LETTER FINAL MEM + 0x0056: 0x05de, # HEBREW LETTER MEM + 0x0057: 0x05df, # HEBREW LETTER FINAL NUN + 0x0058: 0x05e0, # HEBREW LETTER NUN + 0x0059: 0x05e1, # HEBREW LETTER SAMEKH + 0x005a: 0x0021, # EXCLAMATION MARK + 0x005b: 0x0024, # DOLLAR SIGN + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x00ac, # NOT SIGN + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x05e2, # HEBREW LETTER AYIN + 0x0063: 0x05e3, # HEBREW LETTER FINAL PE + 0x0064: 0x05e4, # HEBREW LETTER PE + 0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0066: 0x05e6, # HEBREW LETTER TSADI + 0x0067: 0x05e7, # HEBREW LETTER QOF + 0x0068: 0x05e8, # HEBREW LETTER RESH + 0x0069: 0x05e9, # HEBREW LETTER SHIN + 0x006a: 0x00a6, # BROKEN BAR + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: None, # UNDEFINED + 0x0071: 0x05ea, # HEBREW LETTER TAV + 0x0072: None, # UNDEFINED + 0x0073: None, # UNDEFINED + 0x0074: 0x00a0, # NO-BREAK SPACE + 0x0075: None, # UNDEFINED + 0x0076: None, # UNDEFINED + 0x0077: None, # UNDEFINED + 0x0078: 0x2017, # DOUBLE LOW LINE + 0x0079: 0x0060, # GRAVE ACCENT + 0x007a: 0x003a, # COLON + 0x007b: 0x0023, # NUMBER SIGN + 0x007c: 0x0040, # COMMERCIAL AT + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x0022, # QUOTATION MARK + 0x0080: None, # UNDEFINED + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: 0x00b1, # PLUS-MINUS SIGN + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: None, # UNDEFINED + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0x00b8, # CEDILLA + 0x009e: None, # UNDEFINED + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x00b5, # MICRO SIGN + 0x00a1: 0x007e, # TILDE + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: None, # UNDEFINED + 0x00ab: None, # UNDEFINED + 0x00ac: None, # UNDEFINED + 0x00ad: None, # UNDEFINED + 0x00ae: None, # UNDEFINED + 0x00af: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x005e, # CIRCUMFLEX ACCENT + 0x00b1: 0x00a3, # POUND SIGN + 0x00b2: 0x00a5, # YEN SIGN + 0x00b3: 0x00b7, # MIDDLE DOT + 0x00b4: 0x00a9, # COPYRIGHT SIGN + 0x00b5: 0x00a7, # SECTION SIGN + 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ba: 0x005b, # LEFT SQUARE BRACKET + 0x00bb: 0x005d, # RIGHT SQUARE BRACKET + 0x00bc: 0x00af, # MACRON + 0x00bd: 0x00a8, # DIAERESIS + 0x00be: 0x00b4, # ACUTE ACCENT + 0x00bf: 0x00d7, # MULTIPLICATION SIGN + 0x00c0: 0x007b, # LEFT CURLY BRACKET + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: None, # UNDEFINED + 0x00cc: None, # UNDEFINED + 0x00cd: None, # UNDEFINED + 0x00ce: None, # UNDEFINED + 0x00cf: None, # UNDEFINED + 0x00d0: 0x007d, # RIGHT CURLY BRACKET + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b9, # SUPERSCRIPT ONE + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: None, # UNDEFINED + 0x00e0: 0x005c, # REVERSE SOLIDUS + 0x00e1: 0x00f7, # DIVISION SIGN + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: None, # UNDEFINED + 0x00ec: None, # UNDEFINED + 0x00ed: None, # UNDEFINED + 0x00ee: None, # UNDEFINED + 0x00ef: None, # UNDEFINED + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: None, # UNDEFINED + 0x00fc: None, # UNDEFINED + 0x00fd: None, # UNDEFINED + 0x00fe: None, # UNDEFINED + 0x00ff: 0x009f, # EIGHT ONES }) ### Encoding Map diff --git a/Lib/encodings/cp437.py b/Lib/encodings/cp437.py index a55e424..bfe218f 100644 --- a/Lib/encodings/cp437.py +++ b/Lib/encodings/cp437.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py index bc1acde..bc3474f 100644 --- a/Lib/encodings/cp500.py +++ b/Lib/encodings/cp500.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,242 +37,242 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # CONTROL - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # CONTROL - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # CONTROL - 0x0009: 0x008d, # CONTROL - 0x000a: 0x008e, # CONTROL - 0x0014: 0x009d, # CONTROL - 0x0015: 0x0085, # CONTROL - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # CONTROL - 0x001a: 0x0092, # CONTROL - 0x001b: 0x008f, # CONTROL - 0x0020: 0x0080, # CONTROL - 0x0021: 0x0081, # CONTROL - 0x0022: 0x0082, # CONTROL - 0x0023: 0x0083, # CONTROL - 0x0024: 0x0084, # CONTROL - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # CONTROL - 0x0029: 0x0089, # CONTROL - 0x002a: 0x008a, # CONTROL - 0x002b: 0x008b, # CONTROL - 0x002c: 0x008c, # CONTROL - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # CONTROL - 0x0031: 0x0091, # CONTROL - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # CONTROL - 0x0034: 0x0094, # CONTROL - 0x0035: 0x0095, # CONTROL - 0x0036: 0x0096, # CONTROL - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # CONTROL - 0x0039: 0x0099, # CONTROL - 0x003a: 0x009a, # CONTROL - 0x003b: 0x009b, # CONTROL - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # CONTROL - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x00a0, # NO-BREAK SPACE - 0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x004a: 0x005b, # LEFT SQUARE BRACKET - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x0021, # EXCLAMATION MARK - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x005a: 0x005d, # RIGHT SQUARE BRACKET - 0x005b: 0x0024, # DOLLAR SIGN - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x005e, # CIRCUMFLEX ACCENT - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x006a: 0x00a6, # BROKEN BAR - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0079: 0x0060, # GRAVE ACCENT - 0x007a: 0x003a, # COLON - 0x007b: 0x0023, # NUMBER SIGN - 0x007c: 0x0040, # COMMERCIAL AT - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x0022, # QUOTATION MARK - 0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x008f: 0x00b1, # PLUS-MINUS SIGN - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x009c: 0x00e6, # LATIN SMALL LIGATURE AE - 0x009d: 0x00b8, # CEDILLA - 0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x00b5, # MICRO SIGN - 0x00a1: 0x007e, # TILDE - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ab: 0x00bf, # INVERTED QUESTION MARK - 0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00af: 0x00ae, # REGISTERED SIGN - 0x00b0: 0x00a2, # CENT SIGN - 0x00b1: 0x00a3, # POUND SIGN - 0x00b2: 0x00a5, # YEN SIGN - 0x00b3: 0x00b7, # MIDDLE DOT - 0x00b4: 0x00a9, # COPYRIGHT SIGN - 0x00b5: 0x00a7, # SECTION SIGN - 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ba: 0x00ac, # NOT SIGN - 0x00bb: 0x007c, # VERTICAL LINE - 0x00bc: 0x00af, # MACRON - 0x00bd: 0x00a8, # DIAERESIS - 0x00be: 0x00b4, # ACUTE ACCENT - 0x00bf: 0x00d7, # MULTIPLICATION SIGN - 0x00c0: 0x007b, # LEFT CURLY BRACKET - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00d0: 0x007d, # RIGHT CURLY BRACKET - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b9, # SUPERSCRIPT ONE - 0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00e0: 0x005c, # REVERSE SOLIDUS - 0x00e1: 0x00f7, # DIVISION SIGN - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ff: 0x009f, # CONTROL + 0x0004: 0x009c, # CONTROL + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # CONTROL + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # CONTROL + 0x0009: 0x008d, # CONTROL + 0x000a: 0x008e, # CONTROL + 0x0014: 0x009d, # CONTROL + 0x0015: 0x0085, # CONTROL + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # CONTROL + 0x001a: 0x0092, # CONTROL + 0x001b: 0x008f, # CONTROL + 0x0020: 0x0080, # CONTROL + 0x0021: 0x0081, # CONTROL + 0x0022: 0x0082, # CONTROL + 0x0023: 0x0083, # CONTROL + 0x0024: 0x0084, # CONTROL + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # CONTROL + 0x0029: 0x0089, # CONTROL + 0x002a: 0x008a, # CONTROL + 0x002b: 0x008b, # CONTROL + 0x002c: 0x008c, # CONTROL + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # CONTROL + 0x0031: 0x0091, # CONTROL + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # CONTROL + 0x0034: 0x0094, # CONTROL + 0x0035: 0x0095, # CONTROL + 0x0036: 0x0096, # CONTROL + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # CONTROL + 0x0039: 0x0099, # CONTROL + 0x003a: 0x009a, # CONTROL + 0x003b: 0x009b, # CONTROL + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # CONTROL + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x00a0, # NO-BREAK SPACE + 0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x004a: 0x005b, # LEFT SQUARE BRACKET + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x0021, # EXCLAMATION MARK + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x005a: 0x005d, # RIGHT SQUARE BRACKET + 0x005b: 0x0024, # DOLLAR SIGN + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x005e, # CIRCUMFLEX ACCENT + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x006a: 0x00a6, # BROKEN BAR + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0079: 0x0060, # GRAVE ACCENT + 0x007a: 0x003a, # COLON + 0x007b: 0x0023, # NUMBER SIGN + 0x007c: 0x0040, # COMMERCIAL AT + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x0022, # QUOTATION MARK + 0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x008f: 0x00b1, # PLUS-MINUS SIGN + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x009c: 0x00e6, # LATIN SMALL LIGATURE AE + 0x009d: 0x00b8, # CEDILLA + 0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x00b5, # MICRO SIGN + 0x00a1: 0x007e, # TILDE + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ab: 0x00bf, # INVERTED QUESTION MARK + 0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00af: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00a2, # CENT SIGN + 0x00b1: 0x00a3, # POUND SIGN + 0x00b2: 0x00a5, # YEN SIGN + 0x00b3: 0x00b7, # MIDDLE DOT + 0x00b4: 0x00a9, # COPYRIGHT SIGN + 0x00b5: 0x00a7, # SECTION SIGN + 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ba: 0x00ac, # NOT SIGN + 0x00bb: 0x007c, # VERTICAL LINE + 0x00bc: 0x00af, # MACRON + 0x00bd: 0x00a8, # DIAERESIS + 0x00be: 0x00b4, # ACUTE ACCENT + 0x00bf: 0x00d7, # MULTIPLICATION SIGN + 0x00c0: 0x007b, # LEFT CURLY BRACKET + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00d0: 0x007d, # RIGHT CURLY BRACKET + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b9, # SUPERSCRIPT ONE + 0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00e0: 0x005c, # REVERSE SOLIDUS + 0x00e1: 0x00f7, # DIVISION SIGN + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ff: 0x009f, # CONTROL }) ### Encoding Map diff --git a/Lib/encodings/cp737.py b/Lib/encodings/cp737.py index 6174d85..a4729b1 100644 --- a/Lib/encodings/cp737.py +++ b/Lib/encodings/cp737.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x008b: 0x039c, # GREEK CAPITAL LETTER MU - 0x008c: 0x039d, # GREEK CAPITAL LETTER NU - 0x008d: 0x039e, # GREEK CAPITAL LETTER XI - 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x0099: 0x03b2, # GREEK SMALL LETTER BETA - 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA - 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA - 0x009e: 0x03b7, # GREEK SMALL LETTER ETA - 0x009f: 0x03b8, # GREEK SMALL LETTER THETA - 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00a3: 0x03bc, # GREEK SMALL LETTER MU - 0x00a4: 0x03bd, # GREEK SMALL LETTER NU - 0x00a5: 0x03be, # GREEK SMALL LETTER XI - 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00a7: 0x03c0, # GREEK SMALL LETTER PI - 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO - 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI - 0x00af: 0x03c8, # GREEK SMALL LETTER PSI - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA + 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA + 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA + 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x008b: 0x039c, # GREEK CAPITAL LETTER MU + 0x008c: 0x039d, # GREEK CAPITAL LETTER NU + 0x008d: 0x039e, # GREEK CAPITAL LETTER XI + 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI + 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x0099: 0x03b2, # GREEK SMALL LETTER BETA + 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA + 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA + 0x009e: 0x03b7, # GREEK SMALL LETTER ETA + 0x009f: 0x03b8, # GREEK SMALL LETTER THETA + 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00a3: 0x03bc, # GREEK SMALL LETTER MU + 0x00a4: 0x03bd, # GREEK SMALL LETTER NU + 0x00a5: 0x03be, # GREEK SMALL LETTER XI + 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00a7: 0x03c0, # GREEK SMALL LETTER PI + 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO + 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI + 0x00af: 0x03c8, # GREEK SMALL LETTER PSI + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp775.py b/Lib/encodings/cp775.py index ee56f01..d8cda72 100644 --- a/Lib/encodings/cp775.py +++ b/Lib/encodings/cp775.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0096: 0x00a2, # CENT SIGN - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a7: 0x00a6, # BROKEN BAR - 0x00a8: 0x00a9, # COPYRIGHT SIGN - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0096: 0x00a2, # CENT SIGN + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a7: 0x00a6, # BROKEN BAR + 0x00a8: 0x00a9, # COPYRIGHT SIGN + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp850.py b/Lib/encodings/cp850.py index dd23187..ae09839 100644 --- a/Lib/encodings/cp850.py +++ b/Lib/encodings/cp850.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp852.py b/Lib/encodings/cp852.py index 38df00f..dad5d29 100644 --- a/Lib/encodings/cp852.py +++ b/Lib/encodings/cp852.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00f2: 0x02db, # OGONEK - 0x00f3: 0x02c7, # CARON - 0x00f4: 0x02d8, # BREVE - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00f2: 0x02db, # OGONEK + 0x00f3: 0x02c7, # CARON + 0x00f4: 0x02d8, # BREVE + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp855.py b/Lib/encodings/cp855.py index 764fe1a..d93c5e8 100644 --- a/Lib/encodings/cp855.py +++ b/Lib/encodings/cp855.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO - 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI - 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE - 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU - 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00ef: 0x2116, # NUMERO SIGN - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E - 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO + 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI + 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE + 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU + 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00ef: 0x2116, # NUMERO SIGN + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E + 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp856.py b/Lib/encodings/cp856.py index 9d09c09..1bf67f0 100644 --- a/Lib/encodings/cp856.py +++ b/Lib/encodings/cp856.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: None, # UNDEFINED - 0x009c: 0x00a3, # POUND SIGN - 0x009d: None, # UNDEFINED - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: None, # UNDEFINED - 0x00a0: None, # UNDEFINED - 0x00a1: None, # UNDEFINED - 0x00a2: None, # UNDEFINED - 0x00a3: None, # UNDEFINED - 0x00a4: None, # UNDEFINED - 0x00a5: None, # UNDEFINED - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: None, # UNDEFINED - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: None, # UNDEFINED - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: None, # UNDEFINED - 0x00b6: None, # UNDEFINED - 0x00b7: None, # UNDEFINED - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: None, # UNDEFINED - 0x00c7: None, # UNDEFINED - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: None, # UNDEFINED - 0x00d1: None, # UNDEFINED - 0x00d2: None, # UNDEFINED - 0x00d3: None, # UNDEFINEDS - 0x00d4: None, # UNDEFINED - 0x00d5: None, # UNDEFINED - 0x00d6: None, # UNDEFINEDE - 0x00d7: None, # UNDEFINED - 0x00d8: None, # UNDEFINED - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: None, # UNDEFINED - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: None, # UNDEFINED - 0x00e1: None, # UNDEFINED - 0x00e2: None, # UNDEFINED - 0x00e3: None, # UNDEFINED - 0x00e4: None, # UNDEFINED - 0x00e5: None, # UNDEFINED - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: None, # UNDEFINED - 0x00e9: None, # UNDEFINED - 0x00ea: None, # UNDEFINED - 0x00eb: None, # UNDEFINED - 0x00ec: None, # UNDEFINED - 0x00ed: None, # UNDEFINED - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: None, # UNDEFINED + 0x009c: 0x00a3, # POUND SIGN + 0x009d: None, # UNDEFINED + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: None, # UNDEFINED + 0x00a0: None, # UNDEFINED + 0x00a1: None, # UNDEFINED + 0x00a2: None, # UNDEFINED + 0x00a3: None, # UNDEFINED + 0x00a4: None, # UNDEFINED + 0x00a5: None, # UNDEFINED + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: None, # UNDEFINED + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: None, # UNDEFINED + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: None, # UNDEFINED + 0x00b6: None, # UNDEFINED + 0x00b7: None, # UNDEFINED + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: None, # UNDEFINED + 0x00c7: None, # UNDEFINED + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: None, # UNDEFINED + 0x00d1: None, # UNDEFINED + 0x00d2: None, # UNDEFINED + 0x00d3: None, # UNDEFINEDS + 0x00d4: None, # UNDEFINED + 0x00d5: None, # UNDEFINED + 0x00d6: None, # UNDEFINEDE + 0x00d7: None, # UNDEFINED + 0x00d8: None, # UNDEFINED + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: None, # UNDEFINED + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: None, # UNDEFINED + 0x00e1: None, # UNDEFINED + 0x00e2: None, # UNDEFINED + 0x00e3: None, # UNDEFINED + 0x00e4: None, # UNDEFINED + 0x00e5: None, # UNDEFINED + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: None, # UNDEFINED + 0x00e9: None, # UNDEFINED + 0x00ea: None, # UNDEFINED + 0x00eb: None, # UNDEFINED + 0x00ec: None, # UNDEFINED + 0x00ed: None, # UNDEFINED + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp857.py b/Lib/encodings/cp857.py index 5c84bb8..bc26241 100644 --- a/Lib/encodings/cp857.py +++ b/Lib/encodings/cp857.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,133 +37,133 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: None, # UNDEFINED - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: 0x00d7, # MULTIPLICATION SIGN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: None, # UNDEFINED - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: None, # UNDEFINED + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: 0x00d7, # MULTIPLICATION SIGN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: None, # UNDEFINED + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp860.py b/Lib/encodings/cp860.py index a721ea3..ae0fcd8 100644 --- a/Lib/encodings/cp860.py +++ b/Lib/encodings/cp860.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp861.py b/Lib/encodings/cp861.py index ccf26b7..4d15b81 100644 --- a/Lib/encodings/cp861.py +++ b/Lib/encodings/cp861.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH - 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00fe, # LATIN SMALL LETTER THORN - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH + 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00fe, # LATIN SMALL LETTER THORN + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp862.py b/Lib/encodings/cp862.py index 8f6d111..f892002 100644 --- a/Lib/encodings/cp862.py +++ b/Lib/encodings/cp862.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp863.py b/Lib/encodings/cp863.py index 3596e64..5f823d1 100644 --- a/Lib/encodings/cp863.py +++ b/Lib/encodings/cp863.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00b6, # PILCROW SIGN - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x2017, # DOUBLE LOW LINE - 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x008f: 0x00a7, # SECTION SIGN - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00a4, # CURRENCY SIGN - 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00a6, # BROKEN BAR - 0x00a1: 0x00b4, # ACUTE ACCENT - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00a8, # DIAERESIS - 0x00a5: 0x00b8, # CEDILLA - 0x00a6: 0x00b3, # SUPERSCRIPT THREE - 0x00a7: 0x00af, # MACRON - 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00b6, # PILCROW SIGN + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x2017, # DOUBLE LOW LINE + 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x008f: 0x00a7, # SECTION SIGN + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00a4, # CURRENCY SIGN + 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00a6, # BROKEN BAR + 0x00a1: 0x00b4, # ACUTE ACCENT + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00a8, # DIAERESIS + 0x00a5: 0x00b8, # CEDILLA + 0x00a6: 0x00b3, # SUPERSCRIPT THREE + 0x00a7: 0x00af, # MACRON + 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp864.py b/Lib/encodings/cp864.py index f510a26..ac59217 100644 --- a/Lib/encodings/cp864.py +++ b/Lib/encodings/cp864.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,132 +37,132 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0025: 0x066a, # ARABIC PERCENT SIGN - 0x0080: 0x00b0, # DEGREE SIGN - 0x0081: 0x00b7, # MIDDLE DOT - 0x0082: 0x2219, # BULLET OPERATOR - 0x0083: 0x221a, # SQUARE ROOT - 0x0084: 0x2592, # MEDIUM SHADE - 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL - 0x0086: 0x2502, # FORMS LIGHT VERTICAL - 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT - 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL - 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT - 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL - 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT - 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT - 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT - 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT - 0x0090: 0x03b2, # GREEK SMALL BETA - 0x0091: 0x221e, # INFINITY - 0x0092: 0x03c6, # GREEK SMALL PHI - 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN - 0x0094: 0x00bd, # FRACTION 1/2 - 0x0095: 0x00bc, # FRACTION 1/4 - 0x0096: 0x2248, # ALMOST EQUAL TO - 0x0097: 0x00ab, # LEFT POINTING GUILLEMET - 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET - 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM - 0x009f: None, # UNDEFINED - 0x00a1: 0x00ad, # SOFT HYPHEN - 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE - 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x00a2, # CENT SIGN - 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00db: 0x00a6, # BROKEN VERTICAL BAR - 0x00dc: 0x00ac, # NOT SIGN - 0x00dd: 0x00f7, # DIVISION SIGN - 0x00de: 0x00d7, # MULTIPLICATION SIGN - 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM - 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM - 0x00f1: 0x0651, # ARABIC SHADDAH - 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM - 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM - 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: None, # UNDEFINED + 0x0025: 0x066a, # ARABIC PERCENT SIGN + 0x0080: 0x00b0, # DEGREE SIGN + 0x0081: 0x00b7, # MIDDLE DOT + 0x0082: 0x2219, # BULLET OPERATOR + 0x0083: 0x221a, # SQUARE ROOT + 0x0084: 0x2592, # MEDIUM SHADE + 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL + 0x0086: 0x2502, # FORMS LIGHT VERTICAL + 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT + 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL + 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT + 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL + 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT + 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT + 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT + 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT + 0x0090: 0x03b2, # GREEK SMALL BETA + 0x0091: 0x221e, # INFINITY + 0x0092: 0x03c6, # GREEK SMALL PHI + 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN + 0x0094: 0x00bd, # FRACTION 1/2 + 0x0095: 0x00bc, # FRACTION 1/4 + 0x0096: 0x2248, # ALMOST EQUAL TO + 0x0097: 0x00ab, # LEFT POINTING GUILLEMET + 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET + 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM + 0x009f: None, # UNDEFINED + 0x00a1: 0x00ad, # SOFT HYPHEN + 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE + 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x00a2, # CENT SIGN + 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00db: 0x00a6, # BROKEN VERTICAL BAR + 0x00dc: 0x00ac, # NOT SIGN + 0x00dd: 0x00f7, # DIVISION SIGN + 0x00de: 0x00d7, # MULTIPLICATION SIGN + 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM + 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00f1: 0x0651, # ARABIC SHADDAH + 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM + 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM + 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: None, # UNDEFINED }) ### Encoding Map diff --git a/Lib/encodings/cp865.py b/Lib/encodings/cp865.py index cc4f719..b4f88f6 100644 --- a/Lib/encodings/cp865.py +++ b/Lib/encodings/cp865.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00a4, # CURRENCY SIGN - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00a4, # CURRENCY SIGN + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py index 518eede..a6a26e5 100644 --- a/Lib/encodings/cp866.py +++ b/Lib/encodings/cp866.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O - 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x2116, # NUMERO SIGN - 0x00fd: 0x00a4, # CURRENCY SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O + 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x2116, # NUMERO SIGN + 0x00fd: 0x00a4, # CURRENCY SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp869.py b/Lib/encodings/cp869.py index 2e3ad35..21b0ab8 100644 --- a/Lib/encodings/cp869.py +++ b/Lib/encodings/cp869.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: None, # UNDEFINED - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: None, # UNDEFINED - 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0087: None, # UNDEFINED - 0x0088: 0x00b7, # MIDDLE DOT - 0x0089: 0x00ac, # NOT SIGN - 0x008a: 0x00a6, # BROKEN BAR - 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x008e: 0x2015, # HORIZONTAL BAR - 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0093: None, # UNDEFINED - 0x0094: None, # UNDEFINED - 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x0097: 0x00a9, # COPYRIGHT SIGN - 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0099: 0x00b2, # SUPERSCRIPT TWO - 0x009a: 0x00b3, # SUPERSCRIPT THREE - 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU - 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI - 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA - 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00e6: 0x03bc, # GREEK SMALL LETTER MU - 0x00e7: 0x03bd, # GREEK SMALL LETTER NU - 0x00e8: 0x03be, # GREEK SMALL LETTER XI - 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ea: 0x03c0, # GREEK SMALL LETTER PI - 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO - 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ef: 0x0384, # GREEK TONOS - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: None, # UNDEFINED + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: None, # UNDEFINED + 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0087: None, # UNDEFINED + 0x0088: 0x00b7, # MIDDLE DOT + 0x0089: 0x00ac, # NOT SIGN + 0x008a: 0x00a6, # BROKEN BAR + 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x008e: 0x2015, # HORIZONTAL BAR + 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x0093: None, # UNDEFINED + 0x0094: None, # UNDEFINED + 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x0097: 0x00a9, # COPYRIGHT SIGN + 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0099: 0x00b2, # SUPERSCRIPT TWO + 0x009a: 0x00b3, # SUPERSCRIPT THREE + 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU + 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI + 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA + 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00e6: 0x03bc, # GREEK SMALL LETTER MU + 0x00e7: 0x03bd, # GREEK SMALL LETTER NU + 0x00e8: 0x03be, # GREEK SMALL LETTER XI + 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00ea: 0x03c0, # GREEK SMALL LETTER PI + 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO + 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ef: 0x0384, # GREEK TONOS + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Encoding Map diff --git a/Lib/encodings/cp874.py b/Lib/encodings/cp874.py index c43e20b..01c5eb6 100644 --- a/Lib/encodings/cp874.py +++ b/Lib/encodings/cp874.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,133 +37,133 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: None, # UNDEFINED - 0x0087: None, # UNDEFINED - 0x0088: None, # UNDEFINED - 0x0089: None, # UNDEFINED - 0x008a: None, # UNDEFINED - 0x008b: None, # UNDEFINED - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: None, # UNDEFINED - 0x009a: None, # UNDEFINED - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a1: 0x0e01, # THAI CHARACTER KO KAI - 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI - 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT - 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI - 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON - 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG - 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU - 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN - 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING - 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG - 0x00ab: 0x0e0b, # THAI CHARACTER SO SO - 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE - 0x00ad: 0x0e0d, # THAI CHARACTER YO YING - 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA - 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK - 0x00b0: 0x0e10, # THAI CHARACTER THO THAN - 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO - 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO - 0x00b3: 0x0e13, # THAI CHARACTER NO NEN - 0x00b4: 0x0e14, # THAI CHARACTER DO DEK - 0x00b5: 0x0e15, # THAI CHARACTER TO TAO - 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG - 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN - 0x00b8: 0x0e18, # THAI CHARACTER THO THONG - 0x00b9: 0x0e19, # THAI CHARACTER NO NU - 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI - 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA - 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG - 0x00bd: 0x0e1d, # THAI CHARACTER FO FA - 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN - 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN - 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO - 0x00c1: 0x0e21, # THAI CHARACTER MO MA - 0x00c2: 0x0e22, # THAI CHARACTER YO YAK - 0x00c3: 0x0e23, # THAI CHARACTER RO RUA - 0x00c4: 0x0e24, # THAI CHARACTER RU - 0x00c5: 0x0e25, # THAI CHARACTER LO LING - 0x00c6: 0x0e26, # THAI CHARACTER LU - 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN - 0x00c8: 0x0e28, # THAI CHARACTER SO SALA - 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI - 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA - 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP - 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA - 0x00cd: 0x0e2d, # THAI CHARACTER O ANG - 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK - 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI - 0x00d0: 0x0e30, # THAI CHARACTER SARA A - 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT - 0x00d2: 0x0e32, # THAI CHARACTER SARA AA - 0x00d3: 0x0e33, # THAI CHARACTER SARA AM - 0x00d4: 0x0e34, # THAI CHARACTER SARA I - 0x00d5: 0x0e35, # THAI CHARACTER SARA II - 0x00d6: 0x0e36, # THAI CHARACTER SARA UE - 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE - 0x00d8: 0x0e38, # THAI CHARACTER SARA U - 0x00d9: 0x0e39, # THAI CHARACTER SARA UU - 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT - 0x00e0: 0x0e40, # THAI CHARACTER SARA E - 0x00e1: 0x0e41, # THAI CHARACTER SARA AE - 0x00e2: 0x0e42, # THAI CHARACTER SARA O - 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN - 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI - 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO - 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK - 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU - 0x00e8: 0x0e48, # THAI CHARACTER MAI EK - 0x00e9: 0x0e49, # THAI CHARACTER MAI THO - 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI - 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA - 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT - 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT - 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN - 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN - 0x00f0: 0x0e50, # THAI DIGIT ZERO - 0x00f1: 0x0e51, # THAI DIGIT ONE - 0x00f2: 0x0e52, # THAI DIGIT TWO - 0x00f3: 0x0e53, # THAI DIGIT THREE - 0x00f4: 0x0e54, # THAI DIGIT FOUR - 0x00f5: 0x0e55, # THAI DIGIT FIVE - 0x00f6: 0x0e56, # THAI DIGIT SIX - 0x00f7: 0x0e57, # THAI DIGIT SEVEN - 0x00f8: 0x0e58, # THAI DIGIT EIGHT - 0x00f9: 0x0e59, # THAI DIGIT NINE - 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU - 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT - 0x00fc: None, # UNDEFINED - 0x00fd: None, # UNDEFINED - 0x00fe: None, # UNDEFINED - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: None, # UNDEFINED + 0x0087: None, # UNDEFINED + 0x0088: None, # UNDEFINED + 0x0089: None, # UNDEFINED + 0x008a: None, # UNDEFINED + 0x008b: None, # UNDEFINED + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: None, # UNDEFINED + 0x009a: None, # UNDEFINED + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a1: 0x0e01, # THAI CHARACTER KO KAI + 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI + 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT + 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI + 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON + 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG + 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU + 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN + 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING + 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG + 0x00ab: 0x0e0b, # THAI CHARACTER SO SO + 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE + 0x00ad: 0x0e0d, # THAI CHARACTER YO YING + 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA + 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK + 0x00b0: 0x0e10, # THAI CHARACTER THO THAN + 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO + 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO + 0x00b3: 0x0e13, # THAI CHARACTER NO NEN + 0x00b4: 0x0e14, # THAI CHARACTER DO DEK + 0x00b5: 0x0e15, # THAI CHARACTER TO TAO + 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG + 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN + 0x00b8: 0x0e18, # THAI CHARACTER THO THONG + 0x00b9: 0x0e19, # THAI CHARACTER NO NU + 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI + 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA + 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG + 0x00bd: 0x0e1d, # THAI CHARACTER FO FA + 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN + 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN + 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO + 0x00c1: 0x0e21, # THAI CHARACTER MO MA + 0x00c2: 0x0e22, # THAI CHARACTER YO YAK + 0x00c3: 0x0e23, # THAI CHARACTER RO RUA + 0x00c4: 0x0e24, # THAI CHARACTER RU + 0x00c5: 0x0e25, # THAI CHARACTER LO LING + 0x00c6: 0x0e26, # THAI CHARACTER LU + 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN + 0x00c8: 0x0e28, # THAI CHARACTER SO SALA + 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI + 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA + 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP + 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA + 0x00cd: 0x0e2d, # THAI CHARACTER O ANG + 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK + 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI + 0x00d0: 0x0e30, # THAI CHARACTER SARA A + 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT + 0x00d2: 0x0e32, # THAI CHARACTER SARA AA + 0x00d3: 0x0e33, # THAI CHARACTER SARA AM + 0x00d4: 0x0e34, # THAI CHARACTER SARA I + 0x00d5: 0x0e35, # THAI CHARACTER SARA II + 0x00d6: 0x0e36, # THAI CHARACTER SARA UE + 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE + 0x00d8: 0x0e38, # THAI CHARACTER SARA U + 0x00d9: 0x0e39, # THAI CHARACTER SARA UU + 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT + 0x00e0: 0x0e40, # THAI CHARACTER SARA E + 0x00e1: 0x0e41, # THAI CHARACTER SARA AE + 0x00e2: 0x0e42, # THAI CHARACTER SARA O + 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN + 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI + 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO + 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK + 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU + 0x00e8: 0x0e48, # THAI CHARACTER MAI EK + 0x00e9: 0x0e49, # THAI CHARACTER MAI THO + 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI + 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA + 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT + 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT + 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN + 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN + 0x00f0: 0x0e50, # THAI DIGIT ZERO + 0x00f1: 0x0e51, # THAI DIGIT ONE + 0x00f2: 0x0e52, # THAI DIGIT TWO + 0x00f3: 0x0e53, # THAI DIGIT THREE + 0x00f4: 0x0e54, # THAI DIGIT FOUR + 0x00f5: 0x0e55, # THAI DIGIT FIVE + 0x00f6: 0x0e56, # THAI DIGIT SIX + 0x00f7: 0x0e57, # THAI DIGIT SEVEN + 0x00f8: 0x0e58, # THAI DIGIT EIGHT + 0x00f9: 0x0e59, # THAI DIGIT NINE + 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU + 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT + 0x00fc: None, # UNDEFINED + 0x00fd: None, # UNDEFINED + 0x00fe: None, # UNDEFINED + 0x00ff: None, # UNDEFINED }) ### Encoding Map diff --git a/Lib/encodings/cp875.py b/Lib/encodings/cp875.py index 5e74873..95bb777 100644 --- a/Lib/encodings/cp875.py +++ b/Lib/encodings/cp875.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,243 +37,243 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # CONTROL - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # CONTROL - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # CONTROL - 0x0009: 0x008d, # CONTROL - 0x000a: 0x008e, # CONTROL - 0x0014: 0x009d, # CONTROL - 0x0015: 0x0085, # CONTROL - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # CONTROL - 0x001a: 0x0092, # CONTROL - 0x001b: 0x008f, # CONTROL - 0x0020: 0x0080, # CONTROL - 0x0021: 0x0081, # CONTROL - 0x0022: 0x0082, # CONTROL - 0x0023: 0x0083, # CONTROL - 0x0024: 0x0084, # CONTROL - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # CONTROL - 0x0029: 0x0089, # CONTROL - 0x002a: 0x008a, # CONTROL - 0x002b: 0x008b, # CONTROL - 0x002c: 0x008c, # CONTROL - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # CONTROL - 0x0031: 0x0091, # CONTROL - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # CONTROL - 0x0034: 0x0094, # CONTROL - 0x0035: 0x0095, # CONTROL - 0x0036: 0x0096, # CONTROL - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # CONTROL - 0x0039: 0x0099, # CONTROL - 0x003a: 0x009a, # CONTROL - 0x003b: 0x009b, # CONTROL - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # CONTROL - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0042: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0043: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0044: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0045: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0046: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0047: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0048: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0049: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x004a: 0x005b, # LEFT SQUARE BRACKET - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x0021, # EXCLAMATION MARK - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x0052: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x0053: 0x039c, # GREEK CAPITAL LETTER MU - 0x0054: 0x039d, # GREEK CAPITAL LETTER NU - 0x0055: 0x039e, # GREEK CAPITAL LETTER XI - 0x0056: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x0057: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0058: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0059: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x005a: 0x005d, # RIGHT SQUARE BRACKET - 0x005b: 0x0024, # DOLLAR SIGN - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x005e, # CIRCUMFLEX ACCENT - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0063: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0064: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0065: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0066: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0067: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0068: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0069: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x006a: 0x007c, # VERTICAL LINE - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: 0x00a8, # DIAERESIS - 0x0071: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0072: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0073: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0074: 0x00a0, # NO-BREAK SPACE - 0x0075: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0076: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0077: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0078: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0079: 0x0060, # GRAVE ACCENT - 0x007a: 0x003a, # COLON - 0x007b: 0x0023, # NUMBER SIGN - 0x007c: 0x0040, # COMMERCIAL AT - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x0022, # QUOTATION MARK - 0x0080: 0x0385, # GREEK DIALYTIKA TONOS - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x008b: 0x03b2, # GREEK SMALL LETTER BETA - 0x008c: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x008d: 0x03b4, # GREEK SMALL LETTER DELTA - 0x008e: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x008f: 0x03b6, # GREEK SMALL LETTER ZETA - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: 0x03b7, # GREEK SMALL LETTER ETA - 0x009b: 0x03b8, # GREEK SMALL LETTER THETA - 0x009c: 0x03b9, # GREEK SMALL LETTER IOTA - 0x009d: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x009e: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x009f: 0x03bc, # GREEK SMALL LETTER MU - 0x00a0: 0x00b4, # ACUTE ACCENT - 0x00a1: 0x007e, # TILDE - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: 0x03bd, # GREEK SMALL LETTER NU - 0x00ab: 0x03be, # GREEK SMALL LETTER XI - 0x00ac: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ad: 0x03c0, # GREEK SMALL LETTER PI - 0x00ae: 0x03c1, # GREEK SMALL LETTER RHO - 0x00af: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00b0: 0x00a3, # POUND SIGN - 0x00b1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00b2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00b3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00b4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00b5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00b6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00b7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00b8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00b9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ba: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00bb: 0x03c4, # GREEK SMALL LETTER TAU - 0x00bc: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00bd: 0x03c6, # GREEK SMALL LETTER PHI - 0x00be: 0x03c7, # GREEK SMALL LETTER CHI - 0x00bf: 0x03c8, # GREEK SMALL LETTER PSI - 0x00c0: 0x007b, # LEFT CURLY BRACKET - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00cc: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00cd: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00ce: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00cf: 0x2015, # HORIZONTAL BAR - 0x00d0: 0x007d, # RIGHT CURLY BRACKET - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b1, # PLUS-MINUS SIGN - 0x00db: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00dc: 0x001a, # SUBSTITUTE - 0x00dd: 0x0387, # GREEK ANO TELEIA - 0x00de: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00df: 0x00a6, # BROKEN BAR - 0x00e0: 0x005c, # REVERSE SOLIDUS - 0x00e1: 0x001a, # SUBSTITUTE - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: 0x00a7, # SECTION SIGN - 0x00ec: 0x001a, # SUBSTITUTE - 0x00ed: 0x001a, # SUBSTITUTE - 0x00ee: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ef: 0x00ac, # NOT SIGN - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: 0x00a9, # COPYRIGHT SIGN - 0x00fc: 0x001a, # SUBSTITUTE - 0x00fd: 0x001a, # SUBSTITUTE - 0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ff: 0x009f, # CONTROL + 0x0004: 0x009c, # CONTROL + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # CONTROL + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # CONTROL + 0x0009: 0x008d, # CONTROL + 0x000a: 0x008e, # CONTROL + 0x0014: 0x009d, # CONTROL + 0x0015: 0x0085, # CONTROL + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # CONTROL + 0x001a: 0x0092, # CONTROL + 0x001b: 0x008f, # CONTROL + 0x0020: 0x0080, # CONTROL + 0x0021: 0x0081, # CONTROL + 0x0022: 0x0082, # CONTROL + 0x0023: 0x0083, # CONTROL + 0x0024: 0x0084, # CONTROL + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # CONTROL + 0x0029: 0x0089, # CONTROL + 0x002a: 0x008a, # CONTROL + 0x002b: 0x008b, # CONTROL + 0x002c: 0x008c, # CONTROL + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # CONTROL + 0x0031: 0x0091, # CONTROL + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # CONTROL + 0x0034: 0x0094, # CONTROL + 0x0035: 0x0095, # CONTROL + 0x0036: 0x0096, # CONTROL + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # CONTROL + 0x0039: 0x0099, # CONTROL + 0x003a: 0x009a, # CONTROL + 0x003b: 0x009b, # CONTROL + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # CONTROL + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x0042: 0x0392, # GREEK CAPITAL LETTER BETA + 0x0043: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x0044: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x0045: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x0046: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x0047: 0x0397, # GREEK CAPITAL LETTER ETA + 0x0048: 0x0398, # GREEK CAPITAL LETTER THETA + 0x0049: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x004a: 0x005b, # LEFT SQUARE BRACKET + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x0021, # EXCLAMATION MARK + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x0052: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x0053: 0x039c, # GREEK CAPITAL LETTER MU + 0x0054: 0x039d, # GREEK CAPITAL LETTER NU + 0x0055: 0x039e, # GREEK CAPITAL LETTER XI + 0x0056: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x0057: 0x03a0, # GREEK CAPITAL LETTER PI + 0x0058: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x0059: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x005a: 0x005d, # RIGHT SQUARE BRACKET + 0x005b: 0x0024, # DOLLAR SIGN + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x005e, # CIRCUMFLEX ACCENT + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x0063: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x0064: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x0065: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x0066: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x0067: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x0068: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x0069: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x006a: 0x007c, # VERTICAL LINE + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: 0x00a8, # DIAERESIS + 0x0071: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0072: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0073: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x0074: 0x00a0, # NO-BREAK SPACE + 0x0075: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x0076: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x0077: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x0078: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0079: 0x0060, # GRAVE ACCENT + 0x007a: 0x003a, # COLON + 0x007b: 0x0023, # NUMBER SIGN + 0x007c: 0x0040, # COMMERCIAL AT + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x0022, # QUOTATION MARK + 0x0080: 0x0385, # GREEK DIALYTIKA TONOS + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x008b: 0x03b2, # GREEK SMALL LETTER BETA + 0x008c: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x008d: 0x03b4, # GREEK SMALL LETTER DELTA + 0x008e: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x008f: 0x03b6, # GREEK SMALL LETTER ZETA + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: 0x03b7, # GREEK SMALL LETTER ETA + 0x009b: 0x03b8, # GREEK SMALL LETTER THETA + 0x009c: 0x03b9, # GREEK SMALL LETTER IOTA + 0x009d: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x009e: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x009f: 0x03bc, # GREEK SMALL LETTER MU + 0x00a0: 0x00b4, # ACUTE ACCENT + 0x00a1: 0x007e, # TILDE + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: 0x03bd, # GREEK SMALL LETTER NU + 0x00ab: 0x03be, # GREEK SMALL LETTER XI + 0x00ac: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00ad: 0x03c0, # GREEK SMALL LETTER PI + 0x00ae: 0x03c1, # GREEK SMALL LETTER RHO + 0x00af: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00b0: 0x00a3, # POUND SIGN + 0x00b1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00b2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00b3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00b4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00b5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00b6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00b7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00b8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00b9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ba: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00bb: 0x03c4, # GREEK SMALL LETTER TAU + 0x00bc: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00bd: 0x03c6, # GREEK SMALL LETTER PHI + 0x00be: 0x03c7, # GREEK SMALL LETTER CHI + 0x00bf: 0x03c8, # GREEK SMALL LETTER PSI + 0x00c0: 0x007b, # LEFT CURLY BRACKET + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00cc: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00cd: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00ce: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00cf: 0x2015, # HORIZONTAL BAR + 0x00d0: 0x007d, # RIGHT CURLY BRACKET + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b1, # PLUS-MINUS SIGN + 0x00db: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00dc: 0x001a, # SUBSTITUTE + 0x00dd: 0x0387, # GREEK ANO TELEIA + 0x00de: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00df: 0x00a6, # BROKEN BAR + 0x00e0: 0x005c, # REVERSE SOLIDUS + 0x00e1: 0x001a, # SUBSTITUTE + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: 0x00a7, # SECTION SIGN + 0x00ec: 0x001a, # SUBSTITUTE + 0x00ed: 0x001a, # SUBSTITUTE + 0x00ee: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ef: 0x00ac, # NOT SIGN + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: 0x00a9, # COPYRIGHT SIGN + 0x00fc: 0x001a, # SUBSTITUTE + 0x00fd: 0x001a, # SUBSTITUTE + 0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ff: 0x009f, # CONTROL }) ### Encoding Map diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py index 572ff79..5c6e4a4 100644 --- a/Lib/encodings/hex_codec.py +++ b/Lib/encodings/hex_codec.py @@ -51,7 +51,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py index cea2261..9fa2fcc 100644 --- a/Lib/encodings/iso8859_1.py +++ b/Lib/encodings/iso8859_1.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/iso8859_10.py b/Lib/encodings/iso8859_10.py index f29ebd2..df2565a 100644 --- a/Lib/encodings/iso8859_10.py +++ b/Lib/encodings/iso8859_10.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,52 +37,52 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE - 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE - 0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00af: 0x014a, # LATIN CAPITAL LETTER ENG - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE - 0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE - 0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bd: 0x2015, # HORIZONTAL BAR - 0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00bf: 0x014b, # LATIN SMALL LETTER ENG - 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE - 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE - 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00ff: 0x0138, # LATIN SMALL LETTER KRA + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE + 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE + 0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00af: 0x014a, # LATIN CAPITAL LETTER ENG + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE + 0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE + 0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bd: 0x2015, # HORIZONTAL BAR + 0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00bf: 0x014b, # LATIN SMALL LETTER ENG + 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE + 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE + 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00ff: 0x0138, # LATIN SMALL LETTER KRA }) ### Encoding Map diff --git a/Lib/encodings/iso8859_13.py b/Lib/encodings/iso8859_13.py index e0c4ae4..57b6c70 100644 --- a/Lib/encodings/iso8859_13.py +++ b/Lib/encodings/iso8859_13.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,62 +37,62 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00bf: 0x00e6, # LATIN SMALL LETTER AE - 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00bf: 0x00e6, # LATIN SMALL LETTER AE + 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK }) ### Encoding Map diff --git a/Lib/encodings/iso8859_14.py b/Lib/encodings/iso8859_14.py index c84bb23..9f3d6fe 100644 --- a/Lib/encodings/iso8859_14.py +++ b/Lib/encodings/iso8859_14.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,37 +37,37 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE - 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE - 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE - 0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE - 0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE - 0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE - 0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE - 0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE - 0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE - 0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE - 0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE - 0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE - 0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE - 0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE - 0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE - 0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE - 0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE - 0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS - 0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS - 0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE - 0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX - 0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE - 0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX - 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE - 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE + 0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE + 0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE + 0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE + 0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE + 0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS + 0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX }) ### Encoding Map diff --git a/Lib/encodings/iso8859_15.py b/Lib/encodings/iso8859_15.py index bf92acb..5708df0 100644 --- a/Lib/encodings/iso8859_15.py +++ b/Lib/encodings/iso8859_15.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,14 +37,14 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a4: 0x20ac, # EURO SIGN - 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE - 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00a4: 0x20ac, # EURO SIGN + 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE + 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS }) ### Encoding Map diff --git a/Lib/encodings/iso8859_2.py b/Lib/encodings/iso8859_2.py index 1c92a54..c9ac2c2 100644 --- a/Lib/encodings/iso8859_2.py +++ b/Lib/encodings/iso8859_2.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,63 +37,63 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x02d8, # BREVE - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00b7: 0x02c7, # CARON - 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x02d8, # BREVE + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00b7: 0x02c7, # CARON + 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ff: 0x02d9, # DOT ABOVE }) ### Encoding Map diff --git a/Lib/encodings/iso8859_3.py b/Lib/encodings/iso8859_3.py index ac8c8d9..776423e 100644 --- a/Lib/encodings/iso8859_3.py +++ b/Lib/encodings/iso8859_3.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,41 +37,41 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE - 0x00a2: 0x02d8, # BREVE - 0x00a5: None, - 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX - 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX - 0x00ae: None, - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE - 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX - 0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX - 0x00be: None, - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c3: None, - 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX - 0x00d0: None, - 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX - 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE - 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX - 0x00e3: None, - 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX - 0x00f0: None, - 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX - 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE - 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE + 0x00a2: 0x02d8, # BREVE + 0x00a5: None, + 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x00ae: None, + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE + 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x00be: None, + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: None, + 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x00d0: None, + 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE + 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x00e3: None, + 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x00f0: None, + 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE + 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x00ff: 0x02d9, # DOT ABOVE }) ### Encoding Map diff --git a/Lib/encodings/iso8859_4.py b/Lib/encodings/iso8859_4.py index 91db126..3f84965 100644 --- a/Lib/encodings/iso8859_4.py +++ b/Lib/encodings/iso8859_4.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,56 +37,56 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0138, # LATIN SMALL LETTER KRA - 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE - 0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE - 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE - 0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00b7: 0x02c7, # CARON - 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE - 0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG - 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bf: 0x014b, # LATIN SMALL LETTER ENG - 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE - 0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE - 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0138, # LATIN SMALL LETTER KRA + 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE + 0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE + 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE + 0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00b7: 0x02c7, # CARON + 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE + 0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG + 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bf: 0x014b, # LATIN SMALL LETTER ENG + 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE + 0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE + 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00ff: 0x02d9, # DOT ABOVE }) ### Encoding Map diff --git a/Lib/encodings/iso8859_5.py b/Lib/encodings/iso8859_5.py index 96cb073..ac9a842 100644 --- a/Lib/encodings/iso8859_5.py +++ b/Lib/encodings/iso8859_5.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,100 +37,100 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00d0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00d8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00da: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00db: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00de: 0x043e, # CYRILLIC SMALL LETTER O - 0x00df: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x2116, # NUMERO SIGN - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00d0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00d8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00da: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00db: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00de: 0x043e, # CYRILLIC SMALL LETTER O + 0x00df: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x2116, # NUMERO SIGN + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE }) ### Encoding Map diff --git a/Lib/encodings/iso8859_6.py b/Lib/encodings/iso8859_6.py index 298c2d5..3f5ab56 100644 --- a/Lib/encodings/iso8859_6.py +++ b/Lib/encodings/iso8859_6.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,99 +37,99 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: None, - 0x00a2: None, - 0x00a3: None, - 0x00a5: None, - 0x00a6: None, - 0x00a7: None, - 0x00a8: None, - 0x00a9: None, - 0x00aa: None, - 0x00ab: None, - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ae: None, - 0x00af: None, - 0x00b0: None, - 0x00b1: None, - 0x00b2: None, - 0x00b3: None, - 0x00b4: None, - 0x00b5: None, - 0x00b6: None, - 0x00b7: None, - 0x00b8: None, - 0x00b9: None, - 0x00ba: None, - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: None, - 0x00bd: None, - 0x00be: None, - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: None, - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: None, - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: None, - 0x00f4: None, - 0x00f5: None, - 0x00f6: None, - 0x00f7: None, - 0x00f8: None, - 0x00f9: None, - 0x00fa: None, - 0x00fb: None, - 0x00fc: None, - 0x00fd: None, - 0x00fe: None, - 0x00ff: None, + 0x00a1: None, + 0x00a2: None, + 0x00a3: None, + 0x00a5: None, + 0x00a6: None, + 0x00a7: None, + 0x00a8: None, + 0x00a9: None, + 0x00aa: None, + 0x00ab: None, + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ae: None, + 0x00af: None, + 0x00b0: None, + 0x00b1: None, + 0x00b2: None, + 0x00b3: None, + 0x00b4: None, + 0x00b5: None, + 0x00b6: None, + 0x00b7: None, + 0x00b8: None, + 0x00b9: None, + 0x00ba: None, + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: None, + 0x00bd: None, + 0x00be: None, + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: None, + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d7: 0x0637, # ARABIC LETTER TAH + 0x00d8: 0x0638, # ARABIC LETTER ZAH + 0x00d9: 0x0639, # ARABIC LETTER AIN + 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: None, + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0x0641, # ARABIC LETTER FEH + 0x00e2: 0x0642, # ARABIC LETTER QAF + 0x00e3: 0x0643, # ARABIC LETTER KAF + 0x00e4: 0x0644, # ARABIC LETTER LAM + 0x00e5: 0x0645, # ARABIC LETTER MEEM + 0x00e6: 0x0646, # ARABIC LETTER NOON + 0x00e7: 0x0647, # ARABIC LETTER HEH + 0x00e8: 0x0648, # ARABIC LETTER WAW + 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ea: 0x064a, # ARABIC LETTER YEH + 0x00eb: 0x064b, # ARABIC FATHATAN + 0x00ec: 0x064c, # ARABIC DAMMATAN + 0x00ed: 0x064d, # ARABIC KASRATAN + 0x00ee: 0x064e, # ARABIC FATHA + 0x00ef: 0x064f, # ARABIC DAMMA + 0x00f0: 0x0650, # ARABIC KASRA + 0x00f1: 0x0651, # ARABIC SHADDA + 0x00f2: 0x0652, # ARABIC SUKUN + 0x00f3: None, + 0x00f4: None, + 0x00f5: None, + 0x00f6: None, + 0x00f7: None, + 0x00f8: None, + 0x00f9: None, + 0x00fa: None, + 0x00fb: None, + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, }) ### Encoding Map diff --git a/Lib/encodings/iso8859_7.py b/Lib/encodings/iso8859_7.py index eea3abf..2530c68 100644 --- a/Lib/encodings/iso8859_7.py +++ b/Lib/encodings/iso8859_7.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,86 +37,86 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00a4: None, - 0x00a5: None, - 0x00aa: None, - 0x00ae: None, - 0x00af: 0x2015, # HORIZONTAL BAR - 0x00b4: 0x0384, # GREEK TONOS - 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS - 0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU - 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU - 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI - 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00d2: None, - 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00ec: 0x03bc, # GREEK SMALL LETTER MU - 0x00ed: 0x03bd, # GREEK SMALL LETTER NU - 0x00ee: 0x03be, # GREEK SMALL LETTER XI - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ff: None, + 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a4: None, + 0x00a5: None, + 0x00aa: None, + 0x00ae: None, + 0x00af: 0x2015, # HORIZONTAL BAR + 0x00b4: 0x0384, # GREEK TONOS + 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS + 0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU + 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU + 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI + 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, + 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ec: 0x03bc, # GREEK SMALL LETTER MU + 0x00ed: 0x03bd, # GREEK SMALL LETTER NU + 0x00ee: 0x03be, # GREEK SMALL LETTER XI + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ff: None, }) ### Encoding Map diff --git a/Lib/encodings/iso8859_8.py b/Lib/encodings/iso8859_8.py index 45b2ada..d0176ee 100644 --- a/Lib/encodings/iso8859_8.py +++ b/Lib/encodings/iso8859_8.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,74 +37,74 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: None, - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00bf: None, - 0x00c0: None, - 0x00c1: None, - 0x00c2: None, - 0x00c3: None, - 0x00c4: None, - 0x00c5: None, - 0x00c6: None, - 0x00c7: None, - 0x00c8: None, - 0x00c9: None, - 0x00ca: None, - 0x00cb: None, - 0x00cc: None, - 0x00cd: None, - 0x00ce: None, - 0x00cf: None, - 0x00d0: None, - 0x00d1: None, - 0x00d2: None, - 0x00d3: None, - 0x00d4: None, - 0x00d5: None, - 0x00d6: None, - 0x00d7: None, - 0x00d8: None, - 0x00d9: None, - 0x00da: None, - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: 0x2017, # DOUBLE LOW LINE - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, - 0x00fc: None, - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, + 0x00a1: None, + 0x00aa: 0x00d7, # MULTIPLICATION SIGN + 0x00ba: 0x00f7, # DIVISION SIGN + 0x00bf: None, + 0x00c0: None, + 0x00c1: None, + 0x00c2: None, + 0x00c3: None, + 0x00c4: None, + 0x00c5: None, + 0x00c6: None, + 0x00c7: None, + 0x00c8: None, + 0x00c9: None, + 0x00ca: None, + 0x00cb: None, + 0x00cc: None, + 0x00cd: None, + 0x00ce: None, + 0x00cf: None, + 0x00d0: None, + 0x00d1: None, + 0x00d2: None, + 0x00d3: None, + 0x00d4: None, + 0x00d5: None, + 0x00d6: None, + 0x00d7: None, + 0x00d8: None, + 0x00d9: None, + 0x00da: None, + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: 0x2017, # DOUBLE LOW LINE + 0x00e0: 0x05d0, # HEBREW LETTER ALEF + 0x00e1: 0x05d1, # HEBREW LETTER BET + 0x00e2: 0x05d2, # HEBREW LETTER GIMEL + 0x00e3: 0x05d3, # HEBREW LETTER DALET + 0x00e4: 0x05d4, # HEBREW LETTER HE + 0x00e5: 0x05d5, # HEBREW LETTER VAV + 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN + 0x00e7: 0x05d7, # HEBREW LETTER HET + 0x00e8: 0x05d8, # HEBREW LETTER TET + 0x00e9: 0x05d9, # HEBREW LETTER YOD + 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF + 0x00eb: 0x05db, # HEBREW LETTER KAF + 0x00ec: 0x05dc, # HEBREW LETTER LAMED + 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM + 0x00ee: 0x05de, # HEBREW LETTER MEM + 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN + 0x00f0: 0x05e0, # HEBREW LETTER NUN + 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH + 0x00f2: 0x05e2, # HEBREW LETTER AYIN + 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE + 0x00f4: 0x05e4, # HEBREW LETTER PE + 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x00f6: 0x05e6, # HEBREW LETTER TSADI + 0x00f7: 0x05e7, # HEBREW LETTER QOF + 0x00f8: 0x05e8, # HEBREW LETTER RESH + 0x00f9: 0x05e9, # HEBREW LETTER SHIN + 0x00fa: 0x05ea, # HEBREW LETTER TAV + 0x00fb: None, + 0x00fc: None, + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, }) ### Encoding Map diff --git a/Lib/encodings/iso8859_9.py b/Lib/encodings/iso8859_9.py index ad0c190..28a603f 100644 --- a/Lib/encodings/iso8859_9.py +++ b/Lib/encodings/iso8859_9.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,12 +37,12 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA }) ### Encoding Map diff --git a/Lib/encodings/koi8_r.py b/Lib/encodings/koi8_r.py index 242d0c4..7494ca6 100644 --- a/Lib/encodings/koi8_r.py +++ b/Lib/encodings/koi8_r.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,134 +37,134 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x008b: 0x2580, # UPPER HALF BLOCK - 0x008c: 0x2584, # LOWER HALF BLOCK - 0x008d: 0x2588, # FULL BLOCK - 0x008e: 0x258c, # LEFT HALF BLOCK - 0x008f: 0x2590, # RIGHT HALF BLOCK - 0x0090: 0x2591, # LIGHT SHADE - 0x0091: 0x2592, # MEDIUM SHADE - 0x0092: 0x2593, # DARK SHADE - 0x0093: 0x2320, # TOP HALF INTEGRAL - 0x0094: 0x25a0, # BLACK SQUARE - 0x0095: 0x2219, # BULLET OPERATOR - 0x0096: 0x221a, # SQUARE ROOT - 0x0097: 0x2248, # ALMOST EQUAL TO - 0x0098: 0x2264, # LESS-THAN OR EQUAL TO - 0x0099: 0x2265, # GREATER-THAN OR EQUAL TO - 0x009a: 0x00a0, # NO-BREAK SPACE - 0x009b: 0x2321, # BOTTOM HALF INTEGRAL - 0x009c: 0x00b0, # DEGREE SIGN - 0x009d: 0x00b2, # SUPERSCRIPT TWO - 0x009e: 0x00b7, # MIDDLE DOT - 0x009f: 0x00f7, # DIVISION SIGN - 0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00bf: 0x00a9, # COPYRIGHT SIGN - 0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00c1: 0x0430, # CYRILLIC SMALL LETTER A - 0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00c9: 0x0438, # CYRILLIC SMALL LETTER I - 0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00cf: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00d5: 0x0443, # CYRILLIC SMALL LETTER U - 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00dc: 0x044d, # CYRILLIC SMALL LETTER E - 0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x008b: 0x2580, # UPPER HALF BLOCK + 0x008c: 0x2584, # LOWER HALF BLOCK + 0x008d: 0x2588, # FULL BLOCK + 0x008e: 0x258c, # LEFT HALF BLOCK + 0x008f: 0x2590, # RIGHT HALF BLOCK + 0x0090: 0x2591, # LIGHT SHADE + 0x0091: 0x2592, # MEDIUM SHADE + 0x0092: 0x2593, # DARK SHADE + 0x0093: 0x2320, # TOP HALF INTEGRAL + 0x0094: 0x25a0, # BLACK SQUARE + 0x0095: 0x2219, # BULLET OPERATOR + 0x0096: 0x221a, # SQUARE ROOT + 0x0097: 0x2248, # ALMOST EQUAL TO + 0x0098: 0x2264, # LESS-THAN OR EQUAL TO + 0x0099: 0x2265, # GREATER-THAN OR EQUAL TO + 0x009a: 0x00a0, # NO-BREAK SPACE + 0x009b: 0x2321, # BOTTOM HALF INTEGRAL + 0x009c: 0x00b0, # DEGREE SIGN + 0x009d: 0x00b2, # SUPERSCRIPT TWO + 0x009e: 0x00b7, # MIDDLE DOT + 0x009f: 0x00f7, # DIVISION SIGN + 0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00bf: 0x00a9, # COPYRIGHT SIGN + 0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00c1: 0x0430, # CYRILLIC SMALL LETTER A + 0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00c9: 0x0438, # CYRILLIC SMALL LETTER I + 0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00cf: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00d5: 0x0443, # CYRILLIC SMALL LETTER U + 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00dc: 0x044d, # CYRILLIC SMALL LETTER E + 0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN }) ### Encoding Map diff --git a/Lib/encodings/latin_1.py b/Lib/encodings/latin_1.py index ebca9fb..0e55917 100644 --- a/Lib/encodings/latin_1.py +++ b/Lib/encodings/latin_1.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/mac_cyrillic.py b/Lib/encodings/mac_cyrillic.py index 6ae4a30..922523b 100644 --- a/Lib/encodings/mac_cyrillic.py +++ b/Lib/encodings/mac_cyrillic.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,129 +37,129 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x00dc: 0x2116, # NUMERO SIGN - 0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00de: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00df: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x00a4, # CURRENCY SIGN + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00dc: 0x2116, # NUMERO SIGN + 0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00de: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00df: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x00a4, # CURRENCY SIGN }) ### Encoding Map diff --git a/Lib/encodings/mac_greek.py b/Lib/encodings/mac_greek.py index 839cf61..473a157 100644 --- a/Lib/encodings/mac_greek.py +++ b/Lib/encodings/mac_greek.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,132 +37,132 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00b9, # SUPERSCRIPT ONE - 0x0082: 0x00b2, # SUPERSCRIPT TWO - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00b3, # SUPERSCRIPT THREE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x0385, # GREEK DIALYTIKA TONOS - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x0384, # GREEK TONOS - 0x008c: 0x00a8, # DIAERESIS - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00a3, # POUND SIGN - 0x0093: 0x2122, # TRADE MARK SIGN - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x2022, # BULLET - 0x0097: 0x00bd, # VULGAR FRACTION ONE HALF - 0x0098: 0x2030, # PER MILLE SIGN - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00a6, # BROKEN BAR - 0x009c: 0x00ad, # SOFT HYPHEN - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMBDA - 0x00a5: 0x039e, # GREEK CAPITAL LETTER XI - 0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00ac: 0x00a7, # SECTION SIGN - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00b0, # DEGREE SIGN - 0x00af: 0x0387, # GREEK ANO TELEIA - 0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00bb: 0x039c, # GREEK CAPITAL LETTER MU - 0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00c1: 0x039d, # GREEK CAPITAL LETTER NU - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2015, # HORIZONTAL BAR - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03c8, # GREEK SMALL LETTER PSI - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03be, # GREEK SMALL LETTER XI - 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA - 0x00ed: 0x03bc, # GREEK SMALL LETTER MU - 0x00ee: 0x03bd, # GREEK SMALL LETTER NU - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA - 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00ff: None, # UNDEFINED + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00b9, # SUPERSCRIPT ONE + 0x0082: 0x00b2, # SUPERSCRIPT TWO + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00b3, # SUPERSCRIPT THREE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x0385, # GREEK DIALYTIKA TONOS + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x0384, # GREEK TONOS + 0x008c: 0x00a8, # DIAERESIS + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00a3, # POUND SIGN + 0x0093: 0x2122, # TRADE MARK SIGN + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x2022, # BULLET + 0x0097: 0x00bd, # VULGAR FRACTION ONE HALF + 0x0098: 0x2030, # PER MILLE SIGN + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00a6, # BROKEN BAR + 0x009c: 0x00ad, # SOFT HYPHEN + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMBDA + 0x00a5: 0x039e, # GREEK CAPITAL LETTER XI + 0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00ac: 0x00a7, # SECTION SIGN + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00b0, # DEGREE SIGN + 0x00af: 0x0387, # GREEK ANO TELEIA + 0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00bb: 0x039c, # GREEK CAPITAL LETTER MU + 0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00c1: 0x039d, # GREEK CAPITAL LETTER NU + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2015, # HORIZONTAL BAR + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03c8, # GREEK SMALL LETTER PSI + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03be, # GREEK SMALL LETTER XI + 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA + 0x00ed: 0x03bc, # GREEK SMALL LETTER MU + 0x00ee: 0x03bd, # GREEK SMALL LETTER NU + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA + 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00ff: None, # UNDEFINED }) ### Encoding Map diff --git a/Lib/encodings/mac_iceland.py b/Lib/encodings/mac_iceland.py index 284580c..00bddf9 100644 --- a/Lib/encodings/mac_iceland.py +++ b/Lib/encodings/mac_iceland.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,128 +37,128 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x2044, # FRACTION SLASH - 0x00db: 0x00a4, # CURRENCY SIGN - 0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00dd: 0x00f0, # LATIN SMALL LETTER ETH - 0x00df: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x2126, # OHM SIGN + 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x2044, # FRACTION SLASH + 0x00db: 0x00a4, # CURRENCY SIGN + 0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00dd: 0x00f0, # LATIN SMALL LETTER ETH + 0x00df: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: None, # UNDEFINED + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) ### Encoding Map diff --git a/Lib/encodings/mac_latin2.py b/Lib/encodings/mac_latin2.py index e922178..f5d5225 100644 --- a/Lib/encodings/mac_latin2.py +++ b/Lib/encodings/mac_latin2.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,132 +37,132 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00ff: 0x02c7, # CARON }) ### Encoding Map diff --git a/Lib/encodings/mac_roman.py b/Lib/encodings/mac_roman.py index 23dca89..3f02ab1 100644 --- a/Lib/encodings/mac_roman.py +++ b/Lib/encodings/mac_roman.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,129 +37,129 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x2044, # FRACTION SLASH - 0x00db: 0x00a4, # CURRENCY SIGN - 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x00de: 0xfb01, # LATIN SMALL LIGATURE FI - 0x00df: 0xfb02, # LATIN SMALL LIGATURE FL - 0x00e0: 0x2021, # DOUBLE DAGGER - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x2126, # OHM SIGN + 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x2044, # FRACTION SLASH + 0x00db: 0x00a4, # CURRENCY SIGN + 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x00de: 0xfb01, # LATIN SMALL LIGATURE FI + 0x00df: 0xfb02, # LATIN SMALL LIGATURE FL + 0x00e0: 0x2021, # DOUBLE DAGGER + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: None, # UNDEFINED + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) ### Encoding Map diff --git a/Lib/encodings/mac_turkish.py b/Lib/encodings/mac_turkish.py index c71268b..7f66f50 100644 --- a/Lib/encodings/mac_turkish.py +++ b/Lib/encodings/mac_turkish.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -37,129 +37,129 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00e0: 0x2021, # DOUBLE DAGGER - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: None, # UNDEFINED - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x2126, # OHM SIGN + 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00e0: 0x2021, # DOUBLE DAGGER + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: None, # UNDEFINED + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: None, # UNDEFINED + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) ### Encoding Map diff --git a/Lib/encodings/mbcs.py b/Lib/encodings/mbcs.py index 5103980..c79f47c 100644 --- a/Lib/encodings/mbcs.py +++ b/Lib/encodings/mbcs.py @@ -20,7 +20,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index 735d738..c0f0606 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -11,13 +11,13 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/raw_unicode_escape.py b/Lib/encodings/raw_unicode_escape.py index e142846..a2f3fff 100644 --- a/Lib/encodings/raw_unicode_escape.py +++ b/Lib/encodings/raw_unicode_escape.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py index 532ff64..d5c91da 100644 --- a/Lib/encodings/rot_13.py +++ b/Lib/encodings/rot_13.py @@ -16,14 +16,14 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) - + def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/undefined.py b/Lib/encodings/undefined.py index 7de993c..d2277ac 100644 --- a/Lib/encodings/undefined.py +++ b/Lib/encodings/undefined.py @@ -23,7 +23,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py index 841651b..8fb6293 100644 --- a/Lib/encodings/unicode_escape.py +++ b/Lib/encodings/unicode_escape.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/unicode_internal.py b/Lib/encodings/unicode_internal.py index 4a0f4c1..3bd2fa0 100644 --- a/Lib/encodings/unicode_internal.py +++ b/Lib/encodings/unicode_internal.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index 2c2638f..8c79c79 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -31,7 +31,7 @@ class StreamWriter(Codec,codecs.StreamWriter): else: self.encode = codecs.utf_16_be_encode return result - + class StreamReader(Codec,codecs.StreamReader): def __init__(self, stream, errors='strict'): self.bom_read = 0 @@ -61,4 +61,3 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return (Codec.encode,Codec.decode,StreamReader,StreamWriter) - diff --git a/Lib/encodings/utf_16_be.py b/Lib/encodings/utf_16_be.py index 63ac608..dad540b 100644 --- a/Lib/encodings/utf_16_be.py +++ b/Lib/encodings/utf_16_be.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): def readline(self, size=None): @@ -30,4 +30,3 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return (Codec.encode,Codec.decode,StreamReader,StreamWriter) - diff --git a/Lib/encodings/utf_16_le.py b/Lib/encodings/utf_16_le.py index aa9d6f5..8120d5b 100644 --- a/Lib/encodings/utf_16_le.py +++ b/Lib/encodings/utf_16_le.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): def readline(self, size=None): @@ -30,4 +30,3 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return (Codec.encode,Codec.decode,StreamReader,StreamWriter) - diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py index 441a7f7..ee78d09 100644 --- a/Lib/encodings/utf_7.py +++ b/Lib/encodings/utf_7.py @@ -15,7 +15,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -24,4 +24,3 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return (Codec.encode,Codec.decode,StreamReader,StreamWriter) - diff --git a/Lib/encodings/utf_8.py b/Lib/encodings/utf_8.py index a745f5b..89249a9 100644 --- a/Lib/encodings/utf_8.py +++ b/Lib/encodings/utf_8.py @@ -19,7 +19,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass @@ -28,4 +28,3 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return (Codec.encode,Codec.decode,StreamReader,StreamWriter) - diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py index 6ef8369..a70ff9e 100644 --- a/Lib/encodings/uu_codec.py +++ b/Lib/encodings/uu_codec.py @@ -37,7 +37,7 @@ def uu_encode(input,errors='strict',filename='<data>',mode=0666): write(b2a_uu(chunk)) chunk = read(45) write(' \nend\n') - + return (outfile.getvalue(), len(input)) def uu_decode(input,errors='strict'): @@ -98,10 +98,10 @@ class Codec(codecs.Codec): return uu_encode(input,errors) def decode(self,input,errors='strict'): return uu_decode(input,errors) - + class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py index d9f7d04..9b6e4d1 100644 --- a/Lib/encodings/zlib_codec.py +++ b/Lib/encodings/zlib_codec.py @@ -52,7 +52,7 @@ class Codec(codecs.Codec): class StreamWriter(Codec,codecs.StreamWriter): pass - + class StreamReader(Codec,codecs.StreamReader): pass diff --git a/Lib/heapq.py b/Lib/heapq.py index 4970437..698e6fe 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -231,7 +231,7 @@ def _siftup(heap, pos): # Set childpos to index of smaller child. rightpos = childpos + 1 if rightpos < endpos and heap[rightpos] <= heap[childpos]: - childpos = rightpos + childpos = rightpos # Move the smaller child up. heap[pos] = heap[childpos] pos = childpos diff --git a/Lib/httplib.py b/Lib/httplib.py index ef131d3..e58a30a 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -560,7 +560,7 @@ class HTTPConnection: def _output(self, s): """Add a line of output to the current request buffer. - + Assumes that the line does *not* end with \\r\\n. """ self._buffer.append(s) diff --git a/Lib/smtplib.py b/Lib/smtplib.py index 3699178..a6f113c 100755 --- a/Lib/smtplib.py +++ b/Lib/smtplib.py @@ -530,7 +530,7 @@ class SMTP: def encode_plain(user, password): return encode_base64("%s\0%s\0%s" % (user, user, password), eol="") - + AUTH_PLAIN = "PLAIN" AUTH_CRAM_MD5 = "CRAM-MD5" diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 836836b1..c8feb19 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -314,4 +314,3 @@ def run_contains_tests(test): vereq('asdf' in 'asdf', True) vereq('asdf' in 'asd', False) vereq('asdf' in '', False) - diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 3013a08..be120fb 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -221,4 +221,3 @@ if have_unicode: test_exc('%d', '1', TypeError, "int argument required") test_exc('%g', '1', TypeError, "float argument required") - diff --git a/Lib/test/test_slice.py b/Lib/test/test_slice.py index a837abe..7b3ee06 100644 --- a/Lib/test/test_slice.py +++ b/Lib/test/test_slice.py @@ -11,4 +11,3 @@ vereq(slice(3, None, -2).indices(10), (3, -1, -2)) vereq(slice(-100, 100 ).indices(10), slice(None).indices(10)) vereq(slice(100, -100, -1).indices(10), slice(None, None, -1).indices(10)) vereq(slice(-100L, 100L, 2L).indices(10), (0, 10, 2)) - diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 98d9e78..69113fd 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -571,7 +571,7 @@ class FileObjectClassTestCase(SocketConnectedTest): class UnbufferedFileObjectClassTestCase(FileObjectClassTestCase): """Repeat the tests from FileObjectClassTestCase with bufsize==0. - + In this case (and in this case only), it should be possible to create a file object, read a line from it, create another file object, read another line from it, without loss of data in the diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 2e8e35e..bd84021 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -20,7 +20,7 @@ class LocaleTime_Tests(unittest.TestCase): self.LT_ins = _strptime.LocaleTime() def compare_against_time(self, testing, directive, tuple_position, error_msg): - """Helper method that tests testing against directive based on the + """Helper method that tests testing against directive based on the tuple_position of time_tuple. Uses error_msg as error message. """ @@ -28,17 +28,17 @@ class LocaleTime_Tests(unittest.TestCase): comparison = testing[self.time_tuple[tuple_position]] self.failUnless(strftime_output in testing, "%s: not found in tuple" % error_msg) self.failUnless(comparison == strftime_output, "%s: position within tuple incorrect; %s != %s" % (error_msg, comparison, strftime_output)) - + def test_weekday(self): - """Make sure that full and abbreviated weekday names are correct in + """Make sure that full and abbreviated weekday names are correct in both string and position with tuple. - + """ self.compare_against_time(self.LT_ins.f_weekday, '%A', 6, "Testing of full weekday name failed") self.compare_against_time(self.LT_ins.a_weekday, '%a', 6, "Testing of abbreviated weekday name failed") def test_month(self): - """Test full and abbreviated month names; both string and position + """Test full and abbreviated month names; both string and position within the tuple. """ @@ -125,9 +125,9 @@ class TimeRETests(unittest.TestCase): self.failUnless(found and found.group('A') == self.locale_time.f_weekday[6], "re object for '%A' failed") compiled = self.time_re.compile(r"%a %b") found = compiled.match("%s %s" % (self.locale_time.a_weekday[4], self.locale_time.a_month[4])) - self.failUnless(found, + self.failUnless(found, "Match failed with '%s' regex and '%s' string" % (compiled.pattern, "%s %s" % (self.locale_time.a_weekday[4], self.locale_time.a_month[4]))) - self.failUnless(found.group('a') == self.locale_time.a_weekday[4] and found.group('b') == self.locale_time.a_month[4], + self.failUnless(found.group('a') == self.locale_time.a_weekday[4] and found.group('b') == self.locale_time.a_month[4], "re object couldn't find the abbreviated weekday month in '%s' using '%s'; group 'a' = '%s', group 'b' = %s'" % (found.string, found.re.pattern, found.group('a'), found.group('b'))) for directive in ('a','A','b','B','c','d','H','I','j','m','M','p','S','U','w','W','x','X','y','Y','Z','%'): compiled = self.time_re.compile("%%%s"% directive) @@ -199,9 +199,9 @@ class StrptimeTests(unittest.TestCase): def test_timezone(self): """Test timezone directives. - + When gmtime() is used with %Z, entire result of strftime() is empty. - + """ time_tuple = time.localtime() strf_output = time.strftime("%Z") #UTC does not have a timezone @@ -246,7 +246,7 @@ class FxnTests(unittest.TestCase): strf_output = time.strftime("%Y-%m-%d", self.time_tuple) strp_output = _strptime.strptime(strf_output, "%Y-%m-%d") self.failUnless(strp_output[7] == self.time_tuple[7], "strptime did not trigger julianday(); %s != %s" % (strp_output[7], self.time_tuple[7])) - + def test_gregorian_result(self): """Test gregorian.""" result = _strptime.gregorian(self.time_tuple[7], self.time_tuple[0]) diff --git a/Lib/test/test_whichdb.py b/Lib/test/test_whichdb.py index 8f63862..21f1588 100644 --- a/Lib/test/test_whichdb.py +++ b/Lib/test/test_whichdb.py @@ -41,7 +41,7 @@ for name in anydbm._names: mod = __import__(name) except ImportError: continue - + def test_whichdb_name(self,name=name,mod=mod): """Check whether whichdb correctly guesses module name for databases opened with module mod. @@ -60,4 +60,3 @@ def test_main(): if __name__ == "__main__": test_main() - |