diff options
author | Marc-André Lemburg <mal@egenix.com> | 2005-10-21 13:49:12 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2005-10-21 13:49:12 (GMT) |
commit | a1129f4b9bdf655e8b5b9565b87ead5f32f4b574 (patch) | |
tree | db7cf3de8872e250c8c9153a1b0dec559f869ecb /Lib/encodings | |
parent | 92b201debcb7c796a68df372f3660e02b2c85d40 (diff) | |
download | cpython-a1129f4b9bdf655e8b5b9565b87ead5f32f4b574.zip cpython-a1129f4b9bdf655e8b5b9565b87ead5f32f4b574.tar.gz cpython-a1129f4b9bdf655e8b5b9565b87ead5f32f4b574.tar.bz2 |
Replace the old charmap codecs with new ones generated from the current
mapping tables available at ftp.unicode.org.
These new codecs include and use character decoding tables which speeds
up decoding by a few factors.
Diffstat (limited to 'Lib/encodings')
49 files changed, 29964 insertions, 5129 deletions
diff --git a/Lib/encodings/cp1006.py b/Lib/encodings/cp1006.py index bbd3d87..93f2688 100644 --- a/Lib/encodings/cp1006.py +++ b/Lib/encodings/cp1006.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP1006.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,102 +32,619 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO - 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE - 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO - 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE - 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR - 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE - 0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX - 0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE - 0x00ab: 0x060c, # ARABIC COMMA - 0x00ac: 0x061b, # ARABIC SEMICOLON - 0x00ae: 0x061f, # ARABIC QUESTION MARK - 0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM - 0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM - 0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM - 0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM - 0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM - 0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM - 0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN - 0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM - 0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM - 0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM - 0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM - 0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM - 0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM - 0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM - 0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM - 0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM - 0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - 0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - 0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - 0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM - 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM - 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE + 0x00ab: 0x060c, # ARABIC COMMA + 0x00ac: 0x061b, # ARABIC SEMICOLON + 0x00ae: 0x061f, # ARABIC QUESTION MARK + 0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM + 0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM + 0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM + 0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM + 0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM + 0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM + 0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN + 0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM + 0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM + 0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM + 0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM + 0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM + 0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM + 0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM + 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u06f0' # 0x00a1 -> EXTENDED ARABIC-INDIC DIGIT ZERO + u'\u06f1' # 0x00a2 -> EXTENDED ARABIC-INDIC DIGIT ONE + u'\u06f2' # 0x00a3 -> EXTENDED ARABIC-INDIC DIGIT TWO + u'\u06f3' # 0x00a4 -> EXTENDED ARABIC-INDIC DIGIT THREE + u'\u06f4' # 0x00a5 -> EXTENDED ARABIC-INDIC DIGIT FOUR + u'\u06f5' # 0x00a6 -> EXTENDED ARABIC-INDIC DIGIT FIVE + u'\u06f6' # 0x00a7 -> EXTENDED ARABIC-INDIC DIGIT SIX + u'\u06f7' # 0x00a8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN + u'\u06f8' # 0x00a9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT + u'\u06f9' # 0x00aa -> EXTENDED ARABIC-INDIC DIGIT NINE + u'\u060c' # 0x00ab -> ARABIC COMMA + u'\u061b' # 0x00ac -> ARABIC SEMICOLON + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u061f' # 0x00ae -> ARABIC QUESTION MARK + u'\ufe81' # 0x00af -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe8d' # 0x00b0 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe8e' # 0x00b1 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8e' # 0x00b2 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0x00b3 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe91' # 0x00b4 -> ARABIC LETTER BEH INITIAL FORM + u'\ufb56' # 0x00b5 -> ARABIC LETTER PEH ISOLATED FORM + u'\ufb58' # 0x00b6 -> ARABIC LETTER PEH INITIAL FORM + u'\ufe93' # 0x00b7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe95' # 0x00b8 -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe97' # 0x00b9 -> ARABIC LETTER TEH INITIAL FORM + u'\ufb66' # 0x00ba -> ARABIC LETTER TTEH ISOLATED FORM + u'\ufb68' # 0x00bb -> ARABIC LETTER TTEH INITIAL FORM + u'\ufe99' # 0x00bc -> ARABIC LETTER THEH ISOLATED FORM + u'\ufe9b' # 0x00bd -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9d' # 0x00be -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufe9f' # 0x00bf -> ARABIC LETTER JEEM INITIAL FORM + u'\ufb7a' # 0x00c0 -> ARABIC LETTER TCHEH ISOLATED FORM + u'\ufb7c' # 0x00c1 -> ARABIC LETTER TCHEH INITIAL FORM + u'\ufea1' # 0x00c2 -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea3' # 0x00c3 -> ARABIC LETTER HAH INITIAL FORM + u'\ufea5' # 0x00c4 -> ARABIC LETTER KHAH ISOLATED FORM + u'\ufea7' # 0x00c5 -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0x00c6 -> ARABIC LETTER DAL ISOLATED FORM + u'\ufb84' # 0x00c7 -> ARABIC LETTER DAHAL ISOLATED FORMN + u'\ufeab' # 0x00c8 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0x00c9 -> ARABIC LETTER REH ISOLATED FORM + u'\ufb8c' # 0x00ca -> ARABIC LETTER RREH ISOLATED FORM + u'\ufeaf' # 0x00cb -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufb8a' # 0x00cc -> ARABIC LETTER JEH ISOLATED FORM + u'\ufeb1' # 0x00cd -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb3' # 0x00ce -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb5' # 0x00cf -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb7' # 0x00d0 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufeb9' # 0x00d1 -> ARABIC LETTER SAD ISOLATED FORM + u'\ufebb' # 0x00d2 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebd' # 0x00d3 -> ARABIC LETTER DAD ISOLATED FORM + u'\ufebf' # 0x00d4 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0x00d5 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0x00d6 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufec9' # 0x00d7 -> ARABIC LETTER AIN ISOLATED FORM + u'\ufeca' # 0x00d8 -> ARABIC LETTER AIN FINAL FORM + u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecc' # 0x00da -> ARABIC LETTER AIN MEDIAL FORM + u'\ufecd' # 0x00db -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufece' # 0x00dc -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecf' # 0x00dd -> ARABIC LETTER GHAIN INITIAL FORM + u'\ufed0' # 0x00de -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed1' # 0x00df -> ARABIC LETTER FEH ISOLATED FORM + u'\ufed3' # 0x00e0 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed5' # 0x00e1 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufed9' # 0x00e3 -> ARABIC LETTER KAF ISOLATED FORM + u'\ufedb' # 0x00e4 -> ARABIC LETTER KAF INITIAL FORM + u'\ufb92' # 0x00e5 -> ARABIC LETTER GAF ISOLATED FORM + u'\ufb94' # 0x00e6 -> ARABIC LETTER GAF INITIAL FORM + u'\ufedd' # 0x00e7 -> ARABIC LETTER LAM ISOLATED FORM + u'\ufedf' # 0x00e8 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee0' # 0x00e9 -> ARABIC LETTER LAM MEDIAL FORM + u'\ufee1' # 0x00ea -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufee3' # 0x00eb -> ARABIC LETTER MEEM INITIAL FORM + u'\ufb9e' # 0x00ec -> ARABIC LETTER NOON GHUNNA ISOLATED FORM + u'\ufee5' # 0x00ed -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee7' # 0x00ee -> ARABIC LETTER NOON INITIAL FORM + u'\ufe85' # 0x00ef -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeed' # 0x00f0 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufba6' # 0x00f1 -> ARABIC LETTER HEH GOAL ISOLATED FORM + u'\ufba8' # 0x00f2 -> ARABIC LETTER HEH GOAL INITIAL FORM + u'\ufba9' # 0x00f3 -> ARABIC LETTER HEH GOAL MEDIAL FORM + u'\ufbaa' # 0x00f4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + u'\ufe80' # 0x00f5 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe89' # 0x00f6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + u'\ufe8a' # 0x00f7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + u'\ufe8b' # 0x00f8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufef1' # 0x00f9 -> ARABIC LETTER YEH ISOLATED FORM + u'\ufef2' # 0x00fa -> ARABIC LETTER YEH FINAL FORM + u'\ufef3' # 0x00fb -> ARABIC LETTER YEH INITIAL FORM + u'\ufbb0' # 0x00fc -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + u'\ufbae' # 0x00fd -> ARABIC LETTER YEH BARREE ISOLATED FORM + u'\ufe7c' # 0x00fe -> ARABIC SHADDA ISOLATED FORM + u'\ufe7d' # 0x00ff -> ARABIC SHADDA MEDIAL FORM +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x060c: 0x00ab, # ARABIC COMMA + 0x061b: 0x00ac, # ARABIC SEMICOLON + 0x061f: 0x00ae, # ARABIC QUESTION MARK + 0x06f0: 0x00a1, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x06f1: 0x00a2, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x06f2: 0x00a3, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x06f3: 0x00a4, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x06f4: 0x00a5, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x06f5: 0x00a6, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x06f6: 0x00a7, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x06f7: 0x00a8, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x06f8: 0x00a9, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x06f9: 0x00aa, # EXTENDED ARABIC-INDIC DIGIT NINE + 0xfb56: 0x00b5, # ARABIC LETTER PEH ISOLATED FORM + 0xfb58: 0x00b6, # ARABIC LETTER PEH INITIAL FORM + 0xfb66: 0x00ba, # ARABIC LETTER TTEH ISOLATED FORM + 0xfb68: 0x00bb, # ARABIC LETTER TTEH INITIAL FORM + 0xfb7a: 0x00c0, # ARABIC LETTER TCHEH ISOLATED FORM + 0xfb7c: 0x00c1, # ARABIC LETTER TCHEH INITIAL FORM + 0xfb84: 0x00c7, # ARABIC LETTER DAHAL ISOLATED FORMN + 0xfb8a: 0x00cc, # ARABIC LETTER JEH ISOLATED FORM + 0xfb8c: 0x00ca, # ARABIC LETTER RREH ISOLATED FORM + 0xfb92: 0x00e5, # ARABIC LETTER GAF ISOLATED FORM + 0xfb94: 0x00e6, # ARABIC LETTER GAF INITIAL FORM + 0xfb9e: 0x00ec, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0xfba6: 0x00f1, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0xfba8: 0x00f2, # ARABIC LETTER HEH GOAL INITIAL FORM + 0xfba9: 0x00f3, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0xfbaa: 0x00f4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0xfbae: 0x00fd, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0xfbb0: 0x00fc, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0xfe7c: 0x00fe, # ARABIC SHADDA ISOLATED FORM + 0xfe7d: 0x00ff, # ARABIC SHADDA MEDIAL FORM + 0xfe80: 0x00f5, # ARABIC LETTER HAMZA ISOLATED FORM + 0xfe81: 0x00af, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfe85: 0x00ef, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xfe89: 0x00f6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0xfe8a: 0x00f7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0xfe8b: 0x00f8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xfe8d: 0x00b0, # ARABIC LETTER ALEF ISOLATED FORM + 0xfe8e: None, # ARABIC LETTER ALEF FINAL FORM + 0xfe8f: 0x00b3, # ARABIC LETTER BEH ISOLATED FORM + 0xfe91: 0x00b4, # ARABIC LETTER BEH INITIAL FORM + 0xfe93: 0x00b7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xfe95: 0x00b8, # ARABIC LETTER TEH ISOLATED FORM + 0xfe97: 0x00b9, # ARABIC LETTER TEH INITIAL FORM + 0xfe99: 0x00bc, # ARABIC LETTER THEH ISOLATED FORM + 0xfe9b: 0x00bd, # ARABIC LETTER THEH INITIAL FORM + 0xfe9d: 0x00be, # ARABIC LETTER JEEM ISOLATED FORM + 0xfe9f: 0x00bf, # ARABIC LETTER JEEM INITIAL FORM + 0xfea1: 0x00c2, # ARABIC LETTER HAH ISOLATED FORM + 0xfea3: 0x00c3, # ARABIC LETTER HAH INITIAL FORM + 0xfea5: 0x00c4, # ARABIC LETTER KHAH ISOLATED FORM + 0xfea7: 0x00c5, # ARABIC LETTER KHAH INITIAL FORM + 0xfea9: 0x00c6, # ARABIC LETTER DAL ISOLATED FORM + 0xfeab: 0x00c8, # ARABIC LETTER THAL ISOLATED FORM + 0xfead: 0x00c9, # ARABIC LETTER REH ISOLATED FORM + 0xfeaf: 0x00cb, # ARABIC LETTER ZAIN ISOLATED FORM + 0xfeb1: 0x00cd, # ARABIC LETTER SEEN ISOLATED FORM + 0xfeb3: 0x00ce, # ARABIC LETTER SEEN INITIAL FORM + 0xfeb5: 0x00cf, # ARABIC LETTER SHEEN ISOLATED FORM + 0xfeb7: 0x00d0, # ARABIC LETTER SHEEN INITIAL FORM + 0xfeb9: 0x00d1, # ARABIC LETTER SAD ISOLATED FORM + 0xfebb: 0x00d2, # ARABIC LETTER SAD INITIAL FORM + 0xfebd: 0x00d3, # ARABIC LETTER DAD ISOLATED FORM + 0xfebf: 0x00d4, # ARABIC LETTER DAD INITIAL FORM + 0xfec1: 0x00d5, # ARABIC LETTER TAH ISOLATED FORM + 0xfec5: 0x00d6, # ARABIC LETTER ZAH ISOLATED FORM + 0xfec9: 0x00d7, # ARABIC LETTER AIN ISOLATED FORM + 0xfeca: 0x00d8, # ARABIC LETTER AIN FINAL FORM + 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM + 0xfecc: 0x00da, # ARABIC LETTER AIN MEDIAL FORM + 0xfecd: 0x00db, # ARABIC LETTER GHAIN ISOLATED FORM + 0xfece: 0x00dc, # ARABIC LETTER GHAIN FINAL FORM + 0xfecf: 0x00dd, # ARABIC LETTER GHAIN INITIAL FORM + 0xfed0: 0x00de, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0x00df, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0x00e0, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0x00e1, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0x00e3, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0x00e4, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0x00e7, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0x00e8, # ARABIC LETTER LAM INITIAL FORM + 0xfee0: 0x00e9, # ARABIC LETTER LAM MEDIAL FORM + 0xfee1: 0x00ea, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0x00eb, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0x00ed, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0x00ee, # ARABIC LETTER NOON INITIAL FORM + 0xfeed: 0x00f0, # ARABIC LETTER WAW ISOLATED FORM + 0xfef1: 0x00f9, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0x00fa, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0x00fb, # ARABIC LETTER YEH INITIAL FORM +}
\ No newline at end of file diff --git a/Lib/encodings/cp1250.py b/Lib/encodings/cp1250.py index 85774ed..a227246 100644 --- a/Lib/encodings/cp1250.py +++ b/Lib/encodings/cp1250.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,87 +32,600 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: None, # UNDEFINED - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a1: 0x02c7, # CARON - 0x00a2: 0x02d8, # BREVE - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ff: 0x02d9, # DOT ABOVE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: None, # UNDEFINED + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a1: 0x02c7, # CARON + 0x00a2: 0x02d8, # BREVE + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x0083 -> UNDEFINED + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\ufffe' # 0x0088 -> UNDEFINED + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0160' # 0x008a -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u015a' # 0x008c -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0164' # 0x008d -> LATIN CAPITAL LETTER T WITH CARON + u'\u017d' # 0x008e -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0179' # 0x008f -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0161' # 0x009a -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u015b' # 0x009c -> LATIN SMALL LETTER S WITH ACUTE + u'\u0165' # 0x009d -> LATIN SMALL LETTER T WITH CARON + u'\u017e' # 0x009e -> LATIN SMALL LETTER Z WITH CARON + u'\u017a' # 0x009f -> LATIN SMALL LETTER Z WITH ACUTE + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u02c7' # 0x00a1 -> CARON + u'\u02d8' # 0x00a2 -> BREVE + u'\u0141' # 0x00a3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u0104' # 0x00a5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u015e' # 0x00aa -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u02db' # 0x00b2 -> OGONEK + u'\u0142' # 0x00b3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0105' # 0x00b9 -> LATIN SMALL LETTER A WITH OGONEK + u'\u015f' # 0x00ba -> LATIN SMALL LETTER S WITH CEDILLA + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u013d' # 0x00bc -> LATIN CAPITAL LETTER L WITH CARON + u'\u02dd' # 0x00bd -> DOUBLE ACUTE ACCENT + u'\u013e' # 0x00be -> LATIN SMALL LETTER L WITH CARON + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0x00c0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0x00c5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0x00c6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0x00cc -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0x00cf -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0x00d5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0158' # 0x00d8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0x00d9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0x00db -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0x00de -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0x00e0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0x00e5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0x00e6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0x00ec -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0x00ef -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00f2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0x00f5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0159' # 0x00f8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0x00f9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0x00fe -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b9, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e6, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00cf, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00ef, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00cc, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00ec, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x00c5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x00e5, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x00bc, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x00be, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x00a3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00b3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00f2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x00d5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x00f5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00c0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00e0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00d8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00f8, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x008c, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x009c, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00aa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ba, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x008a, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x009a, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00de, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00fe, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x008d, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x009d, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00d9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x00f9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00db, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x008f, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x009f, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x008e, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x009e, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00a1, # CARON + 0x02d8: 0x00a2, # BREVE + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x00b2, # OGONEK + 0x02dd: 0x00bd, # DOUBLE ACUTE ACCENT + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1251.py b/Lib/encodings/cp1251.py index f191b06..6c4abda 100644 --- a/Lib/encodings/cp1251.py +++ b/Lib/encodings/cp1251.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,121 +32,638 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x20ac, # EURO SIGN - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00b9: 0x2116, # NUMERO SIGN - 0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA + 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x20ac, # EURO SIGN + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00b9: 0x2116, # NUMERO SIGN + 0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0402' # 0x0080 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0x0081 -> CYRILLIC CAPITAL LETTER GJE + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0453' # 0x0083 -> CYRILLIC SMALL LETTER GJE + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u20ac' # 0x0088 -> EURO SIGN + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0409' # 0x008a -> CYRILLIC CAPITAL LETTER LJE + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u040a' # 0x008c -> CYRILLIC CAPITAL LETTER NJE + u'\u040c' # 0x008d -> CYRILLIC CAPITAL LETTER KJE + u'\u040b' # 0x008e -> CYRILLIC CAPITAL LETTER TSHE + u'\u040f' # 0x008f -> CYRILLIC CAPITAL LETTER DZHE + u'\u0452' # 0x0090 -> CYRILLIC SMALL LETTER DJE + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0459' # 0x009a -> CYRILLIC SMALL LETTER LJE + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u045a' # 0x009c -> CYRILLIC SMALL LETTER NJE + u'\u045c' # 0x009d -> CYRILLIC SMALL LETTER KJE + u'\u045b' # 0x009e -> CYRILLIC SMALL LETTER TSHE + u'\u045f' # 0x009f -> CYRILLIC SMALL LETTER DZHE + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u040e' # 0x00a1 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00a2 -> CYRILLIC SMALL LETTER SHORT U + u'\u0408' # 0x00a3 -> CYRILLIC CAPITAL LETTER JE + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u0490' # 0x00a5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u0401' # 0x00a8 -> CYRILLIC CAPITAL LETTER IO + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0404' # 0x00aa -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u0407' # 0x00af -> CYRILLIC CAPITAL LETTER YI + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u0406' # 0x00b2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0456' # 0x00b3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0491' # 0x00b4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u0451' # 0x00b8 -> CYRILLIC SMALL LETTER IO + u'\u2116' # 0x00b9 -> NUMERO SIGN + u'\u0454' # 0x00ba -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0458' # 0x00bc -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0x00bd -> CYRILLIC CAPITAL LETTER DZE + u'\u0455' # 0x00be -> CYRILLIC SMALL LETTER DZE + u'\u0457' # 0x00bf -> CYRILLIC SMALL LETTER YI + u'\u0410' # 0x00c0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x00c1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x00c2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x00c3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x00c4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x00c5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x00c6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x00c7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x00c8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x00c9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x00ca -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x00cb -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x00cc -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x00cd -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x00ce -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x00cf -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x00d0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x00d1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x00d2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x00d3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x00d4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x00d5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x00d6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x00d7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x00d8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x00d9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x00da -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x00db -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x00dc -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x00dd -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x00de -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x00df -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00e0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00e1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00e2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00e3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00e4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00e5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00e6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00e7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00e8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00e9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00ea -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00eb -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ec -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ed -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ee -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00ef -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0x00f0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00f1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00f2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00f3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00f4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00f5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00f6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00f7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00f8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00fa -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00fb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00fc -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00fd -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00fe -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ff -> CYRILLIC SMALL LETTER YA +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0x00a8, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x0080, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x0081, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x00aa, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x00bd, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x00b2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00af, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x00a3, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x008a, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x008c, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x008e, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x008d, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x00a1, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x008f, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00c0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00c1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00c2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00c3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00c4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00c5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00c6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00c7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00c8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00c9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00ca, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00cb, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00cc, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00cd, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00ce, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00cf, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00d0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00d1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00d2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00d3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00d4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00d5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00d6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00d7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00d8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00d9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x00da, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00db, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00dc, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00dd, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x00de, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00df, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00e0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00e1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00e2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00e3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00e4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00e5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00e7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00e8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00e9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00ea, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00eb, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ec, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ed, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ee, # CYRILLIC SMALL LETTER O + 0x043f: 0x00ef, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00f0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00f1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00f2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00f3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00f4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00f5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00f6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00f7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00fa, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00fb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00fc, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00fd, # CYRILLIC SMALL LETTER E + 0x044e: 0x00fe, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ff, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00b8, # CYRILLIC SMALL LETTER IO + 0x0452: 0x0090, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x0083, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x00ba, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x00be, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x00b3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00bf, # CYRILLIC SMALL LETTER YI + 0x0458: 0x00bc, # CYRILLIC SMALL LETTER JE + 0x0459: 0x009a, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x009c, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x009e, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x009d, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x00a2, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x009f, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0x00a5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0x00b4, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0088, # EURO SIGN + 0x2116: 0x00b9, # NUMERO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1252.py b/Lib/encodings/cp1252.py index d1ecaba..92537ab 100644 --- a/Lib/encodings/cp1252.py +++ b/Lib/encodings/cp1252.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,40 +32,553 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0160' # 0x008a -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x008d -> UNDEFINED + u'\u017d' # 0x008e -> LATIN CAPITAL LETTER Z WITH CARON + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0161' # 0x009a -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x009d -> UNDEFINED + u'\u017e' # 0x009e -> LATIN SMALL LETTER Z WITH CARON + u'\u0178' # 0x009f -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x0160: 0x008a, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x009a, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x009f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017d: 0x008e, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x009e, # LATIN SMALL LETTER Z WITH CARON + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1253.py b/Lib/encodings/cp1253.py index 22c70df..68e61d3 100644 --- a/Lib/encodings/cp1253.py +++ b/Lib/encodings/cp1253.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,115 +32,616 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a1: 0x0385, # GREEK DIALYTIKA TONOS - 0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00aa: None, # UNDEFINED - 0x00af: 0x2015, # HORIZONTAL BAR - 0x00b4: 0x0384, # GREEK TONOS - 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU - 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU - 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI - 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00d2: None, # UNDEFINED - 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00ec: 0x03bc, # GREEK SMALL LETTER MU - 0x00ed: 0x03bd, # GREEK SMALL LETTER NU - 0x00ee: 0x03be, # GREEK SMALL LETTER XI - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a1: 0x0385, # GREEK DIALYTIKA TONOS + 0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00aa: None, # UNDEFINED + 0x00af: 0x2015, # HORIZONTAL BAR + 0x00b4: 0x0384, # GREEK TONOS + 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU + 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU + 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI + 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, # UNDEFINED + 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ec: 0x03bc, # GREEK SMALL LETTER MU + 0x00ed: 0x03bd, # GREEK SMALL LETTER NU + 0x00ee: 0x03be, # GREEK SMALL LETTER XI + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\ufffe' # 0x0088 -> UNDEFINED + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0385' # 0x00a1 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0x00a2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\ufffe' # 0x00aa -> UNDEFINED + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u2015' # 0x00af -> HORIZONTAL BAR + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u0384' # 0x00b4 -> GREEK TONOS + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u0388' # 0x00b8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00b9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ba -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0x00bc -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\u038e' # 0x00be -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00bf -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0x00c0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0x00c1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00c2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00c3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00c4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00c5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00c6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00c7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x00c8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00c9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x00ca -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00cb -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00cc -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00cd -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x00ce -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00cf -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x00d0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00d1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' # 0x00d2 -> UNDEFINED + u'\u03a3' # 0x00d3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0x00da -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00db -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0x00dc -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00dd -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00de -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x00df -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0x00e0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0x00e1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00e2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00e3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x00e4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00e5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x00e6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00ea -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00eb -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00ec -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00ed -> GREEK SMALL LETTER NU + u'\u03be' # 0x00ee -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00ef -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00f0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00f1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0x00f2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0x00f3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0x00f4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00f5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00f8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0x00f9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0x00fa -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0x00fc -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00fd -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0x00fe -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x0384: 0x00b4, # GREEK TONOS + 0x0385: 0x00a1, # GREEK DIALYTIKA TONOS + 0x0386: 0x00a2, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00b8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00b9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ba, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00bc, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00be, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00bf, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00c0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00c1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00c2, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00c3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00c4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00c5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00c6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00c7, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00c8, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00c9, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00ca, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00cb, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00cc, # GREEK CAPITAL LETTER MU + 0x039d: 0x00cd, # GREEK CAPITAL LETTER NU + 0x039e: 0x00ce, # GREEK CAPITAL LETTER XI + 0x039f: 0x00cf, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00d0, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00d1, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00d3, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d4, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d5, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d6, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d7, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d8, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d9, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00da, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00db, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00dc, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00dd, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00de, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00df, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00e0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00e1, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00e2, # GREEK SMALL LETTER BETA + 0x03b3: 0x00e3, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00e4, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00e5, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e6, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e7, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e8, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e9, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00ea, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00eb, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00ec, # GREEK SMALL LETTER MU + 0x03bd: 0x00ed, # GREEK SMALL LETTER NU + 0x03be: 0x00ee, # GREEK SMALL LETTER XI + 0x03bf: 0x00ef, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00f0, # GREEK SMALL LETTER PI + 0x03c1: 0x00f1, # GREEK SMALL LETTER RHO + 0x03c2: 0x00f2, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00f3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00f4, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f5, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f6, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f7, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f8, # GREEK SMALL LETTER PSI + 0x03c9: 0x00f9, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00fa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00fc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00fd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fe, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2015: 0x00af, # HORIZONTAL BAR + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1254.py b/Lib/encodings/cp1254.py index 718ed27..50c4e26 100644 --- a/Lib/encodings/cp1254.py +++ b/Lib/encodings/cp1254.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,46 +32,557 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0160' # 0x008a -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0161' # 0x009a -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\u0178' # 0x009f -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0x00d0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0x00dd -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0x00de -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0x00f0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0x00fd -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0x00fe -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00d0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00f0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x00dd, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00fd, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x015e: 0x00de, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00fe, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x008a, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x009a, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x009f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1255.py b/Lib/encodings/cp1255.py index b20f5da..1535906 100644 --- a/Lib/encodings/cp1255.py +++ b/Lib/encodings/cp1255.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,107 +32,602 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a4: 0x20aa, # NEW SHEQEL SIGN - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00c0: 0x05b0, # HEBREW POINT SHEVA - 0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL - 0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH - 0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS - 0x00c4: 0x05b4, # HEBREW POINT HIRIQ - 0x00c5: 0x05b5, # HEBREW POINT TSERE - 0x00c6: 0x05b6, # HEBREW POINT SEGOL - 0x00c7: 0x05b7, # HEBREW POINT PATAH - 0x00c8: 0x05b8, # HEBREW POINT QAMATS - 0x00c9: 0x05b9, # HEBREW POINT HOLAM - 0x00ca: None, # UNDEFINED - 0x00cb: 0x05bb, # HEBREW POINT QUBUTS - 0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ - 0x00cd: 0x05bd, # HEBREW POINT METEG - 0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF - 0x00cf: 0x05bf, # HEBREW POINT RAFE - 0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ - 0x00d1: 0x05c1, # HEBREW POINT SHIN DOT - 0x00d2: 0x05c2, # HEBREW POINT SIN DOT - 0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ - 0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV - 0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD - 0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD - 0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH - 0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM - 0x00d9: None, # UNDEFINED - 0x00da: None, # UNDEFINED - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: None, # UNDEFINED - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, # UNDEFINED - 0x00fc: None, # UNDEFINED - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a4: 0x20aa, # NEW SHEQEL SIGN + 0x00aa: 0x00d7, # MULTIPLICATION SIGN + 0x00ba: 0x00f7, # DIVISION SIGN + 0x00c0: 0x05b0, # HEBREW POINT SHEVA + 0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL + 0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH + 0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS + 0x00c4: 0x05b4, # HEBREW POINT HIRIQ + 0x00c5: 0x05b5, # HEBREW POINT TSERE + 0x00c6: 0x05b6, # HEBREW POINT SEGOL + 0x00c7: 0x05b7, # HEBREW POINT PATAH + 0x00c8: 0x05b8, # HEBREW POINT QAMATS + 0x00c9: 0x05b9, # HEBREW POINT HOLAM + 0x00ca: None, # UNDEFINED + 0x00cb: 0x05bb, # HEBREW POINT QUBUTS + 0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ + 0x00cd: 0x05bd, # HEBREW POINT METEG + 0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF + 0x00cf: 0x05bf, # HEBREW POINT RAFE + 0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ + 0x00d1: 0x05c1, # HEBREW POINT SHIN DOT + 0x00d2: 0x05c2, # HEBREW POINT SIN DOT + 0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ + 0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD + 0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH + 0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM + 0x00d9: None, # UNDEFINED + 0x00da: None, # UNDEFINED + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: None, # UNDEFINED + 0x00e0: 0x05d0, # HEBREW LETTER ALEF + 0x00e1: 0x05d1, # HEBREW LETTER BET + 0x00e2: 0x05d2, # HEBREW LETTER GIMEL + 0x00e3: 0x05d3, # HEBREW LETTER DALET + 0x00e4: 0x05d4, # HEBREW LETTER HE + 0x00e5: 0x05d5, # HEBREW LETTER VAV + 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN + 0x00e7: 0x05d7, # HEBREW LETTER HET + 0x00e8: 0x05d8, # HEBREW LETTER TET + 0x00e9: 0x05d9, # HEBREW LETTER YOD + 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF + 0x00eb: 0x05db, # HEBREW LETTER KAF + 0x00ec: 0x05dc, # HEBREW LETTER LAMED + 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM + 0x00ee: 0x05de, # HEBREW LETTER MEM + 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN + 0x00f0: 0x05e0, # HEBREW LETTER NUN + 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH + 0x00f2: 0x05e2, # HEBREW LETTER AYIN + 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE + 0x00f4: 0x05e4, # HEBREW LETTER PE + 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x00f6: 0x05e6, # HEBREW LETTER TSADI + 0x00f7: 0x05e7, # HEBREW LETTER QOF + 0x00f8: 0x05e8, # HEBREW LETTER RESH + 0x00f9: 0x05e9, # HEBREW LETTER SHIN + 0x00fa: 0x05ea, # HEBREW LETTER TAV + 0x00fb: None, # UNDEFINED + 0x00fc: None, # UNDEFINED + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u20aa' # 0x00a4 -> NEW SHEQEL SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xd7' # 0x00aa -> MULTIPLICATION SIGN + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xf7' # 0x00ba -> DIVISION SIGN + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\u05b0' # 0x00c0 -> HEBREW POINT SHEVA + u'\u05b1' # 0x00c1 -> HEBREW POINT HATAF SEGOL + u'\u05b2' # 0x00c2 -> HEBREW POINT HATAF PATAH + u'\u05b3' # 0x00c3 -> HEBREW POINT HATAF QAMATS + u'\u05b4' # 0x00c4 -> HEBREW POINT HIRIQ + u'\u05b5' # 0x00c5 -> HEBREW POINT TSERE + u'\u05b6' # 0x00c6 -> HEBREW POINT SEGOL + u'\u05b7' # 0x00c7 -> HEBREW POINT PATAH + u'\u05b8' # 0x00c8 -> HEBREW POINT QAMATS + u'\u05b9' # 0x00c9 -> HEBREW POINT HOLAM + u'\ufffe' # 0x00ca -> UNDEFINED + u'\u05bb' # 0x00cb -> HEBREW POINT QUBUTS + u'\u05bc' # 0x00cc -> HEBREW POINT DAGESH OR MAPIQ + u'\u05bd' # 0x00cd -> HEBREW POINT METEG + u'\u05be' # 0x00ce -> HEBREW PUNCTUATION MAQAF + u'\u05bf' # 0x00cf -> HEBREW POINT RAFE + u'\u05c0' # 0x00d0 -> HEBREW PUNCTUATION PASEQ + u'\u05c1' # 0x00d1 -> HEBREW POINT SHIN DOT + u'\u05c2' # 0x00d2 -> HEBREW POINT SIN DOT + u'\u05c3' # 0x00d3 -> HEBREW PUNCTUATION SOF PASUQ + u'\u05f0' # 0x00d4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV + u'\u05f1' # 0x00d5 -> HEBREW LIGATURE YIDDISH VAV YOD + u'\u05f2' # 0x00d6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD + u'\u05f3' # 0x00d7 -> HEBREW PUNCTUATION GERESH + u'\u05f4' # 0x00d8 -> HEBREW PUNCTUATION GERSHAYIM + u'\ufffe' # 0x00d9 -> UNDEFINED + u'\ufffe' # 0x00da -> UNDEFINED + u'\ufffe' # 0x00db -> UNDEFINED + u'\ufffe' # 0x00dc -> UNDEFINED + u'\ufffe' # 0x00dd -> UNDEFINED + u'\ufffe' # 0x00de -> UNDEFINED + u'\ufffe' # 0x00df -> UNDEFINED + u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF + u'\u05d1' # 0x00e1 -> HEBREW LETTER BET + u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET + u'\u05d4' # 0x00e4 -> HEBREW LETTER HE + u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV + u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x00e7 -> HEBREW LETTER HET + u'\u05d8' # 0x00e8 -> HEBREW LETTER TET + u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD + u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x00eb -> HEBREW LETTER KAF + u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED + u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x00ee -> HEBREW LETTER MEM + u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN + u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN + u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x00f4 -> HEBREW LETTER PE + u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI + u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF + u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH + u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN + u'\u05ea' # 0x00fa -> HEBREW LETTER TAV + u'\ufffe' # 0x00fb -> UNDEFINED + u'\ufffe' # 0x00fc -> UNDEFINED + u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00d7: 0x00aa, # MULTIPLICATION SIGN + 0x00f7: 0x00ba, # DIVISION SIGN + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x05b0: 0x00c0, # HEBREW POINT SHEVA + 0x05b1: 0x00c1, # HEBREW POINT HATAF SEGOL + 0x05b2: 0x00c2, # HEBREW POINT HATAF PATAH + 0x05b3: 0x00c3, # HEBREW POINT HATAF QAMATS + 0x05b4: 0x00c4, # HEBREW POINT HIRIQ + 0x05b5: 0x00c5, # HEBREW POINT TSERE + 0x05b6: 0x00c6, # HEBREW POINT SEGOL + 0x05b7: 0x00c7, # HEBREW POINT PATAH + 0x05b8: 0x00c8, # HEBREW POINT QAMATS + 0x05b9: 0x00c9, # HEBREW POINT HOLAM + 0x05bb: 0x00cb, # HEBREW POINT QUBUTS + 0x05bc: 0x00cc, # HEBREW POINT DAGESH OR MAPIQ + 0x05bd: 0x00cd, # HEBREW POINT METEG + 0x05be: 0x00ce, # HEBREW PUNCTUATION MAQAF + 0x05bf: 0x00cf, # HEBREW POINT RAFE + 0x05c0: 0x00d0, # HEBREW PUNCTUATION PASEQ + 0x05c1: 0x00d1, # HEBREW POINT SHIN DOT + 0x05c2: 0x00d2, # HEBREW POINT SIN DOT + 0x05c3: 0x00d3, # HEBREW PUNCTUATION SOF PASUQ + 0x05d0: 0x00e0, # HEBREW LETTER ALEF + 0x05d1: 0x00e1, # HEBREW LETTER BET + 0x05d2: 0x00e2, # HEBREW LETTER GIMEL + 0x05d3: 0x00e3, # HEBREW LETTER DALET + 0x05d4: 0x00e4, # HEBREW LETTER HE + 0x05d5: 0x00e5, # HEBREW LETTER VAV + 0x05d6: 0x00e6, # HEBREW LETTER ZAYIN + 0x05d7: 0x00e7, # HEBREW LETTER HET + 0x05d8: 0x00e8, # HEBREW LETTER TET + 0x05d9: 0x00e9, # HEBREW LETTER YOD + 0x05da: 0x00ea, # HEBREW LETTER FINAL KAF + 0x05db: 0x00eb, # HEBREW LETTER KAF + 0x05dc: 0x00ec, # HEBREW LETTER LAMED + 0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM + 0x05de: 0x00ee, # HEBREW LETTER MEM + 0x05df: 0x00ef, # HEBREW LETTER FINAL NUN + 0x05e0: 0x00f0, # HEBREW LETTER NUN + 0x05e1: 0x00f1, # HEBREW LETTER SAMEKH + 0x05e2: 0x00f2, # HEBREW LETTER AYIN + 0x05e3: 0x00f3, # HEBREW LETTER FINAL PE + 0x05e4: 0x00f4, # HEBREW LETTER PE + 0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x00f6, # HEBREW LETTER TSADI + 0x05e7: 0x00f7, # HEBREW LETTER QOF + 0x05e8: 0x00f8, # HEBREW LETTER RESH + 0x05e9: 0x00f9, # HEBREW LETTER SHIN + 0x05ea: 0x00fa, # HEBREW LETTER TAV + 0x05f0: 0x00d4, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x05f1: 0x00d5, # HEBREW LIGATURE YIDDISH VAV YOD + 0x05f2: 0x00d6, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x05f3: 0x00d7, # HEBREW PUNCTUATION GERESH + 0x05f4: 0x00d8, # HEBREW PUNCTUATION GERSHAYIM + 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK + 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20aa: 0x00a4, # NEW SHEQEL SIGN + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1256.py b/Lib/encodings/cp1256.py index f3e694c..a913f9c 100644 --- a/Lib/encodings/cp1256.py +++ b/Lib/encodings/cp1256.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,93 +32,611 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: 0x067e, # ARABIC LETTER PEH - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0679, # ARABIC LETTER TTEH - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: 0x0686, # ARABIC LETTER TCHEH - 0x008e: 0x0698, # ARABIC LETTER JEH - 0x008f: 0x0688, # ARABIC LETTER DDAL - 0x0090: 0x06af, # ARABIC LETTER GAF - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x06a9, # ARABIC LETTER KEHEH - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0691, # ARABIC LETTER RREH - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: 0x200c, # ZERO WIDTH NON-JOINER - 0x009e: 0x200d, # ZERO WIDTH JOINER - 0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA - 0x00a1: 0x060c, # ARABIC COMMA - 0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE - 0x00ba: 0x061b, # ARABIC SEMICOLON - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d8: 0x0637, # ARABIC LETTER TAH - 0x00d9: 0x0638, # ARABIC LETTER ZAH - 0x00da: 0x0639, # ARABIC LETTER AIN - 0x00db: 0x063a, # ARABIC LETTER GHAIN - 0x00dc: 0x0640, # ARABIC TATWEEL - 0x00dd: 0x0641, # ARABIC LETTER FEH - 0x00de: 0x0642, # ARABIC LETTER QAF - 0x00df: 0x0643, # ARABIC LETTER KAF - 0x00e1: 0x0644, # ARABIC LETTER LAM - 0x00e3: 0x0645, # ARABIC LETTER MEEM - 0x00e4: 0x0646, # ARABIC LETTER NOON - 0x00e5: 0x0647, # ARABIC LETTER HEH - 0x00e6: 0x0648, # ARABIC LETTER WAW - 0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ed: 0x064a, # ARABIC LETTER YEH - 0x00f0: 0x064b, # ARABIC FATHATAN - 0x00f1: 0x064c, # ARABIC DAMMATAN - 0x00f2: 0x064d, # ARABIC KASRATAN - 0x00f3: 0x064e, # ARABIC FATHA - 0x00f5: 0x064f, # ARABIC DAMMA - 0x00f6: 0x0650, # ARABIC KASRA - 0x00f8: 0x0651, # ARABIC SHADDA - 0x00fa: 0x0652, # ARABIC SUKUN - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: 0x067e, # ARABIC LETTER PEH + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0679, # ARABIC LETTER TTEH + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: 0x0686, # ARABIC LETTER TCHEH + 0x008e: 0x0698, # ARABIC LETTER JEH + 0x008f: 0x0688, # ARABIC LETTER DDAL + 0x0090: 0x06af, # ARABIC LETTER GAF + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x06a9, # ARABIC LETTER KEHEH + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0691, # ARABIC LETTER RREH + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: 0x200c, # ZERO WIDTH NON-JOINER + 0x009e: 0x200d, # ZERO WIDTH JOINER + 0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA + 0x00a1: 0x060c, # ARABIC COMMA + 0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE + 0x00ba: 0x061b, # ARABIC SEMICOLON + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d8: 0x0637, # ARABIC LETTER TAH + 0x00d9: 0x0638, # ARABIC LETTER ZAH + 0x00da: 0x0639, # ARABIC LETTER AIN + 0x00db: 0x063a, # ARABIC LETTER GHAIN + 0x00dc: 0x0640, # ARABIC TATWEEL + 0x00dd: 0x0641, # ARABIC LETTER FEH + 0x00de: 0x0642, # ARABIC LETTER QAF + 0x00df: 0x0643, # ARABIC LETTER KAF + 0x00e1: 0x0644, # ARABIC LETTER LAM + 0x00e3: 0x0645, # ARABIC LETTER MEEM + 0x00e4: 0x0646, # ARABIC LETTER NOON + 0x00e5: 0x0647, # ARABIC LETTER HEH + 0x00e6: 0x0648, # ARABIC LETTER WAW + 0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ed: 0x064a, # ARABIC LETTER YEH + 0x00f0: 0x064b, # ARABIC FATHATAN + 0x00f1: 0x064c, # ARABIC DAMMATAN + 0x00f2: 0x064d, # ARABIC KASRATAN + 0x00f3: 0x064e, # ARABIC FATHA + 0x00f5: 0x064f, # ARABIC DAMMA + 0x00f6: 0x0650, # ARABIC KASRA + 0x00f8: 0x0651, # ARABIC SHADDA + 0x00fa: 0x0652, # ARABIC SUKUN + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\u067e' # 0x0081 -> ARABIC LETTER PEH + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0679' # 0x008a -> ARABIC LETTER TTEH + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\u0686' # 0x008d -> ARABIC LETTER TCHEH + u'\u0698' # 0x008e -> ARABIC LETTER JEH + u'\u0688' # 0x008f -> ARABIC LETTER DDAL + u'\u06af' # 0x0090 -> ARABIC LETTER GAF + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u06a9' # 0x0098 -> ARABIC LETTER KEHEH + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0691' # 0x009a -> ARABIC LETTER RREH + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\u200c' # 0x009d -> ZERO WIDTH NON-JOINER + u'\u200d' # 0x009e -> ZERO WIDTH JOINER + u'\u06ba' # 0x009f -> ARABIC LETTER NOON GHUNNA + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u060c' # 0x00a1 -> ARABIC COMMA + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u06be' # 0x00aa -> ARABIC LETTER HEH DOACHASHMEE + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\u061b' # 0x00ba -> ARABIC SEMICOLON + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\u06c1' # 0x00c0 -> ARABIC LETTER HEH GOAL + u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA + u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF + u'\u0628' # 0x00c8 -> ARABIC LETTER BEH + u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0x00ca -> ARABIC LETTER TEH + u'\u062b' # 0x00cb -> ARABIC LETTER THEH + u'\u062c' # 0x00cc -> ARABIC LETTER JEEM + u'\u062d' # 0x00cd -> ARABIC LETTER HAH + u'\u062e' # 0x00ce -> ARABIC LETTER KHAH + u'\u062f' # 0x00cf -> ARABIC LETTER DAL + u'\u0630' # 0x00d0 -> ARABIC LETTER THAL + u'\u0631' # 0x00d1 -> ARABIC LETTER REH + u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN + u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN + u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN + u'\u0635' # 0x00d5 -> ARABIC LETTER SAD + u'\u0636' # 0x00d6 -> ARABIC LETTER DAD + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0637' # 0x00d8 -> ARABIC LETTER TAH + u'\u0638' # 0x00d9 -> ARABIC LETTER ZAH + u'\u0639' # 0x00da -> ARABIC LETTER AIN + u'\u063a' # 0x00db -> ARABIC LETTER GHAIN + u'\u0640' # 0x00dc -> ARABIC TATWEEL + u'\u0641' # 0x00dd -> ARABIC LETTER FEH + u'\u0642' # 0x00de -> ARABIC LETTER QAF + u'\u0643' # 0x00df -> ARABIC LETTER KAF + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\u0644' # 0x00e1 -> ARABIC LETTER LAM + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0645' # 0x00e3 -> ARABIC LETTER MEEM + u'\u0646' # 0x00e4 -> ARABIC LETTER NOON + u'\u0647' # 0x00e5 -> ARABIC LETTER HEH + u'\u0648' # 0x00e6 -> ARABIC LETTER WAW + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0649' # 0x00ec -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0x00ed -> ARABIC LETTER YEH + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u064b' # 0x00f0 -> ARABIC FATHATAN + u'\u064c' # 0x00f1 -> ARABIC DAMMATAN + u'\u064d' # 0x00f2 -> ARABIC KASRATAN + u'\u064e' # 0x00f3 -> ARABIC FATHA + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u064f' # 0x00f5 -> ARABIC DAMMA + u'\u0650' # 0x00f6 -> ARABIC KASRA + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0651' # 0x00f8 -> ARABIC SHADDA + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0652' # 0x00fa -> ARABIC SUKUN + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x060c: 0x00a1, # ARABIC COMMA + 0x061b: 0x00ba, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0621: 0x00c1, # ARABIC LETTER HAMZA + 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0x00c7, # ARABIC LETTER ALEF + 0x0628: 0x00c8, # ARABIC LETTER BEH + 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0x00ca, # ARABIC LETTER TEH + 0x062b: 0x00cb, # ARABIC LETTER THEH + 0x062c: 0x00cc, # ARABIC LETTER JEEM + 0x062d: 0x00cd, # ARABIC LETTER HAH + 0x062e: 0x00ce, # ARABIC LETTER KHAH + 0x062f: 0x00cf, # ARABIC LETTER DAL + 0x0630: 0x00d0, # ARABIC LETTER THAL + 0x0631: 0x00d1, # ARABIC LETTER REH + 0x0632: 0x00d2, # ARABIC LETTER ZAIN + 0x0633: 0x00d3, # ARABIC LETTER SEEN + 0x0634: 0x00d4, # ARABIC LETTER SHEEN + 0x0635: 0x00d5, # ARABIC LETTER SAD + 0x0636: 0x00d6, # ARABIC LETTER DAD + 0x0637: 0x00d8, # ARABIC LETTER TAH + 0x0638: 0x00d9, # ARABIC LETTER ZAH + 0x0639: 0x00da, # ARABIC LETTER AIN + 0x063a: 0x00db, # ARABIC LETTER GHAIN + 0x0640: 0x00dc, # ARABIC TATWEEL + 0x0641: 0x00dd, # ARABIC LETTER FEH + 0x0642: 0x00de, # ARABIC LETTER QAF + 0x0643: 0x00df, # ARABIC LETTER KAF + 0x0644: 0x00e1, # ARABIC LETTER LAM + 0x0645: 0x00e3, # ARABIC LETTER MEEM + 0x0646: 0x00e4, # ARABIC LETTER NOON + 0x0647: 0x00e5, # ARABIC LETTER HEH + 0x0648: 0x00e6, # ARABIC LETTER WAW + 0x0649: 0x00ec, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0x00ed, # ARABIC LETTER YEH + 0x064b: 0x00f0, # ARABIC FATHATAN + 0x064c: 0x00f1, # ARABIC DAMMATAN + 0x064d: 0x00f2, # ARABIC KASRATAN + 0x064e: 0x00f3, # ARABIC FATHA + 0x064f: 0x00f5, # ARABIC DAMMA + 0x0650: 0x00f6, # ARABIC KASRA + 0x0651: 0x00f8, # ARABIC SHADDA + 0x0652: 0x00fa, # ARABIC SUKUN + 0x0679: 0x008a, # ARABIC LETTER TTEH + 0x067e: 0x0081, # ARABIC LETTER PEH + 0x0686: 0x008d, # ARABIC LETTER TCHEH + 0x0688: 0x008f, # ARABIC LETTER DDAL + 0x0691: 0x009a, # ARABIC LETTER RREH + 0x0698: 0x008e, # ARABIC LETTER JEH + 0x06a9: 0x0098, # ARABIC LETTER KEHEH + 0x06af: 0x0090, # ARABIC LETTER GAF + 0x06ba: 0x009f, # ARABIC LETTER NOON GHUNNA + 0x06be: 0x00aa, # ARABIC LETTER HEH DOACHASHMEE + 0x06c1: 0x00c0, # ARABIC LETTER HEH GOAL + 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE + 0x200c: 0x009d, # ZERO WIDTH NON-JOINER + 0x200d: 0x009e, # ZERO WIDTH JOINER + 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK + 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1257.py b/Lib/encodings/cp1257.py index 1a21850..3565475 100644 --- a/Lib/encodings/cp1257.py +++ b/Lib/encodings/cp1257.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,95 +32,601 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: None, # UNDEFINED - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: 0x00a8, # DIAERESIS - 0x008e: 0x02c7, # CARON - 0x008f: 0x00b8, # CEDILLA - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: 0x00af, # MACRON - 0x009e: 0x02db, # OGONEK - 0x009f: None, # UNDEFINED - 0x00a1: None, # UNDEFINED - 0x00a5: None, # UNDEFINED - 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00bf: 0x00e6, # LATIN SMALL LETTER AE - 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ff: 0x02d9, # DOT ABOVE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: None, # UNDEFINED + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: 0x00a8, # DIAERESIS + 0x008e: 0x02c7, # CARON + 0x008f: 0x00b8, # CEDILLA + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: 0x00af, # MACRON + 0x009e: 0x02db, # OGONEK + 0x009f: None, # UNDEFINED + 0x00a1: None, # UNDEFINED + 0x00a5: None, # UNDEFINED + 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00bf: 0x00e6, # LATIN SMALL LETTER AE + 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x0083 -> UNDEFINED + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\ufffe' # 0x0088 -> UNDEFINED + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\xa8' # 0x008d -> DIAERESIS + u'\u02c7' # 0x008e -> CARON + u'\xb8' # 0x008f -> CEDILLA + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x009c -> UNDEFINED + u'\xaf' # 0x009d -> MACRON + u'\u02db' # 0x009e -> OGONEK + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\ufffe' # 0x00a1 -> UNDEFINED + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufffe' # 0x00a5 -> UNDEFINED + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xd8' # 0x00a8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0156' # 0x00aa -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xc6' # 0x00af -> LATIN CAPITAL LETTER AE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xf8' # 0x00b8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\u0157' # 0x00ba -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0x00bf -> LATIN SMALL LETTER AE + u'\u0104' # 0x00c0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0x00c1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0x00c2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0x00c3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0x00c6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0x00c7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0x00ca -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0x00cb -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0x00cc -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0x00cd -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0x00ce -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0x00cf -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0x00d0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0x00d4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0172' # 0x00d8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0x00d9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0x00da -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0x00db -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0x00dd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0x00de -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0105' # 0x00e0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0x00e1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0x00e2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0x00e3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0x00e6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0x00e7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0x00ea -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0x00eb -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0x00ec -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0x00ed -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0x00ee -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0x00ef -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0x00f0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0x00f2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0x00f4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0173' # 0x00f8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0x00f9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0x00fa -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0x00fb -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0x00fd -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0x00fe -> LATIN SMALL LETTER Z WITH CARON + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x008d, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x009d, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x008f, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00af, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00a8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00bf, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00b8, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00c2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00c0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00e0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e3, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00c7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00e7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cb, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00eb, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00c6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00e6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00cc, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00ec, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00ce, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00ee, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00cd, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00ed, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00cf, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00ef, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00d9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00f9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00d2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00d4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x00aa, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x00ba, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x00da, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00fa, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00d0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00f0, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00db, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00fb, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x00ca, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ea, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00dd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00fd, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00de, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00fe, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x008e, # CARON + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x009e, # OGONEK + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp1258.py b/Lib/encodings/cp1258.py index 03a6d3b..dfa63ba 100644 --- a/Lib/encodings/cp1258.py +++ b/Lib/encodings/cp1258.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,54 +32,563 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00cc: 0x0300, # COMBINING GRAVE ACCENT - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x0309, # COMBINING HOOK ABOVE - 0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN - 0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN - 0x00de: 0x0303, # COMBINING TILDE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00ec: 0x0301, # COMBINING ACUTE ACCENT - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f2: 0x0323, # COMBINING DOT BELOW - 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN - 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN - 0x00fe: 0x20ab, # DONG SIGN + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00cc: 0x0300, # COMBINING GRAVE ACCENT + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x0309, # COMBINING HOOK ABOVE + 0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN + 0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN + 0x00de: 0x0303, # COMBINING TILDE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00ec: 0x0301, # COMBINING ACUTE ACCENT + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f2: 0x0323, # COMBINING DOT BELOW + 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN + 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN + 0x00fe: 0x20ab, # DONG SIGN }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\u0178' # 0x009f -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0300' # 0x00cc -> COMBINING GRAVE ACCENT + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u0309' # 0x00d2 -> COMBINING HOOK ABOVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u01a0' # 0x00d5 -> LATIN CAPITAL LETTER O WITH HORN + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u01af' # 0x00dd -> LATIN CAPITAL LETTER U WITH HORN + u'\u0303' # 0x00de -> COMBINING TILDE + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0301' # 0x00ec -> COMBINING ACUTE ACCENT + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\u0323' # 0x00f2 -> COMBINING DOT BELOW + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u01a1' # 0x00f5 -> LATIN SMALL LETTER O WITH HORN + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u01b0' # 0x00fd -> LATIN SMALL LETTER U WITH HORN + u'\u20ab' # 0x00fe -> DONG SIGN + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x0178: 0x009f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x01a0: 0x00d5, # LATIN CAPITAL LETTER O WITH HORN + 0x01a1: 0x00f5, # LATIN SMALL LETTER O WITH HORN + 0x01af: 0x00dd, # LATIN CAPITAL LETTER U WITH HORN + 0x01b0: 0x00fd, # LATIN SMALL LETTER U WITH HORN + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x0300: 0x00cc, # COMBINING GRAVE ACCENT + 0x0301: 0x00ec, # COMBINING ACUTE ACCENT + 0x0303: 0x00de, # COMBINING TILDE + 0x0309: 0x00d2, # COMBINING HOOK ABOVE + 0x0323: 0x00f2, # COMBINING DOT BELOW + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ab: 0x00fe, # DONG SIGN + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/cp424.py b/Lib/encodings/cp424.py index 6a53233..1f4480d 100644 --- a/Lib/encodings/cp424.py +++ b/Lib/encodings/cp424.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP424.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,244 +32,724 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # SELECT - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # REQUIRED NEW LINE - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # GRAPHIC ESCAPE - 0x0009: 0x008d, # SUPERSCRIPT - 0x000a: 0x008e, # REPEAT - 0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION - 0x0015: 0x0085, # NEW LINE - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION - 0x001a: 0x0092, # UNIT BACK SPACE - 0x001b: 0x008f, # CUSTOMER USE ONE - 0x0020: 0x0080, # DIGIT SELECT - 0x0021: 0x0081, # START OF SIGNIFICANCE - 0x0022: 0x0082, # FIELD SEPARATOR - 0x0023: 0x0083, # WORD UNDERSCORE - 0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # SET ATTRIBUTE - 0x0029: 0x0089, # START FIELD EXTENDED - 0x002a: 0x008a, # SET MODE OR SWITCH - 0x002b: 0x008b, # CONTROL SEQUENCE PREFIX - 0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # <reserved> - 0x0031: 0x0091, # <reserved> - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # INDEX RETURN - 0x0034: 0x0094, # PRESENTATION POSITION - 0x0035: 0x0095, # TRANSPARENT - 0x0036: 0x0096, # NUMERIC BACKSPACE - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # SUBSCRIPT - 0x0039: 0x0099, # INDENT TABULATION - 0x003a: 0x009a, # REVERSE FORM FEED - 0x003b: 0x009b, # CUSTOMER USE THREE - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # <reserved> - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x05d0, # HEBREW LETTER ALEF - 0x0042: 0x05d1, # HEBREW LETTER BET - 0x0043: 0x05d2, # HEBREW LETTER GIMEL - 0x0044: 0x05d3, # HEBREW LETTER DALET - 0x0045: 0x05d4, # HEBREW LETTER HE - 0x0046: 0x05d5, # HEBREW LETTER VAV - 0x0047: 0x05d6, # HEBREW LETTER ZAYIN - 0x0048: 0x05d7, # HEBREW LETTER HET - 0x0049: 0x05d8, # HEBREW LETTER TET - 0x004a: 0x00a2, # CENT SIGN - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x007c, # VERTICAL LINE - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x05d9, # HEBREW LETTER YOD - 0x0052: 0x05da, # HEBREW LETTER FINAL KAF - 0x0053: 0x05db, # HEBREW LETTER KAF - 0x0054: 0x05dc, # HEBREW LETTER LAMED - 0x0055: 0x05dd, # HEBREW LETTER FINAL MEM - 0x0056: 0x05de, # HEBREW LETTER MEM - 0x0057: 0x05df, # HEBREW LETTER FINAL NUN - 0x0058: 0x05e0, # HEBREW LETTER NUN - 0x0059: 0x05e1, # HEBREW LETTER SAMEKH - 0x005a: 0x0021, # EXCLAMATION MARK - 0x005b: 0x0024, # DOLLAR SIGN - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x00ac, # NOT SIGN - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x05e2, # HEBREW LETTER AYIN - 0x0063: 0x05e3, # HEBREW LETTER FINAL PE - 0x0064: 0x05e4, # HEBREW LETTER PE - 0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0066: 0x05e6, # HEBREW LETTER TSADI - 0x0067: 0x05e7, # HEBREW LETTER QOF - 0x0068: 0x05e8, # HEBREW LETTER RESH - 0x0069: 0x05e9, # HEBREW LETTER SHIN - 0x006a: 0x00a6, # BROKEN BAR - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: None, # UNDEFINED - 0x0071: 0x05ea, # HEBREW LETTER TAV - 0x0072: None, # UNDEFINED - 0x0073: None, # UNDEFINED - 0x0074: 0x00a0, # NO-BREAK SPACE - 0x0075: None, # UNDEFINED - 0x0076: None, # UNDEFINED - 0x0077: None, # UNDEFINED - 0x0078: 0x2017, # DOUBLE LOW LINE - 0x0079: 0x0060, # GRAVE ACCENT - 0x007a: 0x003a, # COLON - 0x007b: 0x0023, # NUMBER SIGN - 0x007c: 0x0040, # COMMERCIAL AT - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x0022, # QUOTATION MARK - 0x0080: None, # UNDEFINED - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: 0x00b1, # PLUS-MINUS SIGN - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: None, # UNDEFINED - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0x00b8, # CEDILLA - 0x009e: None, # UNDEFINED - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x00b5, # MICRO SIGN - 0x00a1: 0x007e, # TILDE - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: None, # UNDEFINED - 0x00ab: None, # UNDEFINED - 0x00ac: None, # UNDEFINED - 0x00ad: None, # UNDEFINED - 0x00ae: None, # UNDEFINED - 0x00af: 0x00ae, # REGISTERED SIGN - 0x00b0: 0x005e, # CIRCUMFLEX ACCENT - 0x00b1: 0x00a3, # POUND SIGN - 0x00b2: 0x00a5, # YEN SIGN - 0x00b3: 0x00b7, # MIDDLE DOT - 0x00b4: 0x00a9, # COPYRIGHT SIGN - 0x00b5: 0x00a7, # SECTION SIGN - 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ba: 0x005b, # LEFT SQUARE BRACKET - 0x00bb: 0x005d, # RIGHT SQUARE BRACKET - 0x00bc: 0x00af, # MACRON - 0x00bd: 0x00a8, # DIAERESIS - 0x00be: 0x00b4, # ACUTE ACCENT - 0x00bf: 0x00d7, # MULTIPLICATION SIGN - 0x00c0: 0x007b, # LEFT CURLY BRACKET - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: None, # UNDEFINED - 0x00cc: None, # UNDEFINED - 0x00cd: None, # UNDEFINED - 0x00ce: None, # UNDEFINED - 0x00cf: None, # UNDEFINED - 0x00d0: 0x007d, # RIGHT CURLY BRACKET - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b9, # SUPERSCRIPT ONE - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: None, # UNDEFINED - 0x00e0: 0x005c, # REVERSE SOLIDUS - 0x00e1: 0x00f7, # DIVISION SIGN - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: None, # UNDEFINED - 0x00ec: None, # UNDEFINED - 0x00ed: None, # UNDEFINED - 0x00ee: None, # UNDEFINED - 0x00ef: None, # UNDEFINED - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: None, # UNDEFINED - 0x00fc: None, # UNDEFINED - 0x00fd: None, # UNDEFINED - 0x00fe: None, # UNDEFINED - 0x00ff: 0x009f, # EIGHT ONES + 0x0004: 0x009c, # SELECT + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # REQUIRED NEW LINE + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # GRAPHIC ESCAPE + 0x0009: 0x008d, # SUPERSCRIPT + 0x000a: 0x008e, # REPEAT + 0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION + 0x0015: 0x0085, # NEW LINE + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION + 0x001a: 0x0092, # UNIT BACK SPACE + 0x001b: 0x008f, # CUSTOMER USE ONE + 0x0020: 0x0080, # DIGIT SELECT + 0x0021: 0x0081, # START OF SIGNIFICANCE + 0x0022: 0x0082, # FIELD SEPARATOR + 0x0023: 0x0083, # WORD UNDERSCORE + 0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # SET ATTRIBUTE + 0x0029: 0x0089, # START FIELD EXTENDED + 0x002a: 0x008a, # SET MODE OR SWITCH + 0x002b: 0x008b, # CONTROL SEQUENCE PREFIX + 0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # <reserved> + 0x0031: 0x0091, # <reserved> + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # INDEX RETURN + 0x0034: 0x0094, # PRESENTATION POSITION + 0x0035: 0x0095, # TRANSPARENT + 0x0036: 0x0096, # NUMERIC BACKSPACE + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # SUBSCRIPT + 0x0039: 0x0099, # INDENT TABULATION + 0x003a: 0x009a, # REVERSE FORM FEED + 0x003b: 0x009b, # CUSTOMER USE THREE + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # <reserved> + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x05d0, # HEBREW LETTER ALEF + 0x0042: 0x05d1, # HEBREW LETTER BET + 0x0043: 0x05d2, # HEBREW LETTER GIMEL + 0x0044: 0x05d3, # HEBREW LETTER DALET + 0x0045: 0x05d4, # HEBREW LETTER HE + 0x0046: 0x05d5, # HEBREW LETTER VAV + 0x0047: 0x05d6, # HEBREW LETTER ZAYIN + 0x0048: 0x05d7, # HEBREW LETTER HET + 0x0049: 0x05d8, # HEBREW LETTER TET + 0x004a: 0x00a2, # CENT SIGN + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x007c, # VERTICAL LINE + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x05d9, # HEBREW LETTER YOD + 0x0052: 0x05da, # HEBREW LETTER FINAL KAF + 0x0053: 0x05db, # HEBREW LETTER KAF + 0x0054: 0x05dc, # HEBREW LETTER LAMED + 0x0055: 0x05dd, # HEBREW LETTER FINAL MEM + 0x0056: 0x05de, # HEBREW LETTER MEM + 0x0057: 0x05df, # HEBREW LETTER FINAL NUN + 0x0058: 0x05e0, # HEBREW LETTER NUN + 0x0059: 0x05e1, # HEBREW LETTER SAMEKH + 0x005a: 0x0021, # EXCLAMATION MARK + 0x005b: 0x0024, # DOLLAR SIGN + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x00ac, # NOT SIGN + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x05e2, # HEBREW LETTER AYIN + 0x0063: 0x05e3, # HEBREW LETTER FINAL PE + 0x0064: 0x05e4, # HEBREW LETTER PE + 0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0066: 0x05e6, # HEBREW LETTER TSADI + 0x0067: 0x05e7, # HEBREW LETTER QOF + 0x0068: 0x05e8, # HEBREW LETTER RESH + 0x0069: 0x05e9, # HEBREW LETTER SHIN + 0x006a: 0x00a6, # BROKEN BAR + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: None, # UNDEFINED + 0x0071: 0x05ea, # HEBREW LETTER TAV + 0x0072: None, # UNDEFINED + 0x0073: None, # UNDEFINED + 0x0074: 0x00a0, # NO-BREAK SPACE + 0x0075: None, # UNDEFINED + 0x0076: None, # UNDEFINED + 0x0077: None, # UNDEFINED + 0x0078: 0x2017, # DOUBLE LOW LINE + 0x0079: 0x0060, # GRAVE ACCENT + 0x007a: 0x003a, # COLON + 0x007b: 0x0023, # NUMBER SIGN + 0x007c: 0x0040, # COMMERCIAL AT + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x0022, # QUOTATION MARK + 0x0080: None, # UNDEFINED + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: 0x00b1, # PLUS-MINUS SIGN + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: None, # UNDEFINED + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0x00b8, # CEDILLA + 0x009e: None, # UNDEFINED + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x00b5, # MICRO SIGN + 0x00a1: 0x007e, # TILDE + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: None, # UNDEFINED + 0x00ab: None, # UNDEFINED + 0x00ac: None, # UNDEFINED + 0x00ad: None, # UNDEFINED + 0x00ae: None, # UNDEFINED + 0x00af: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x005e, # CIRCUMFLEX ACCENT + 0x00b1: 0x00a3, # POUND SIGN + 0x00b2: 0x00a5, # YEN SIGN + 0x00b3: 0x00b7, # MIDDLE DOT + 0x00b4: 0x00a9, # COPYRIGHT SIGN + 0x00b5: 0x00a7, # SECTION SIGN + 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ba: 0x005b, # LEFT SQUARE BRACKET + 0x00bb: 0x005d, # RIGHT SQUARE BRACKET + 0x00bc: 0x00af, # MACRON + 0x00bd: 0x00a8, # DIAERESIS + 0x00be: 0x00b4, # ACUTE ACCENT + 0x00bf: 0x00d7, # MULTIPLICATION SIGN + 0x00c0: 0x007b, # LEFT CURLY BRACKET + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: None, # UNDEFINED + 0x00cc: None, # UNDEFINED + 0x00cd: None, # UNDEFINED + 0x00ce: None, # UNDEFINED + 0x00cf: None, # UNDEFINED + 0x00d0: 0x007d, # RIGHT CURLY BRACKET + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b9, # SUPERSCRIPT ONE + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: None, # UNDEFINED + 0x00e0: 0x005c, # REVERSE SOLIDUS + 0x00e1: 0x00f7, # DIVISION SIGN + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: None, # UNDEFINED + 0x00ec: None, # UNDEFINED + 0x00ed: None, # UNDEFINED + 0x00ee: None, # UNDEFINED + 0x00ef: None, # UNDEFINED + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: None, # UNDEFINED + 0x00fc: None, # UNDEFINED + 0x00fd: None, # UNDEFINED + 0x00fe: None, # UNDEFINED + 0x00ff: 0x009f, # EIGHT ONES }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x9c' # 0x0004 -> SELECT + u'\t' # 0x0005 -> HORIZONTAL TABULATION + u'\x86' # 0x0006 -> REQUIRED NEW LINE + u'\x7f' # 0x0007 -> DELETE + u'\x97' # 0x0008 -> GRAPHIC ESCAPE + u'\x8d' # 0x0009 -> SUPERSCRIPT + u'\x8e' # 0x000a -> REPEAT + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x9d' # 0x0014 -> RESTORE/ENABLE PRESENTATION + u'\x85' # 0x0015 -> NEW LINE + u'\x08' # 0x0016 -> BACKSPACE + u'\x87' # 0x0017 -> PROGRAM OPERATOR COMMUNICATION + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x92' # 0x001a -> UNIT BACK SPACE + u'\x8f' # 0x001b -> CUSTOMER USE ONE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u'\x80' # 0x0020 -> DIGIT SELECT + u'\x81' # 0x0021 -> START OF SIGNIFICANCE + u'\x82' # 0x0022 -> FIELD SEPARATOR + u'\x83' # 0x0023 -> WORD UNDERSCORE + u'\x84' # 0x0024 -> BYPASS OR INHIBIT PRESENTATION + u'\n' # 0x0025 -> LINE FEED + u'\x17' # 0x0026 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x0027 -> ESCAPE + u'\x88' # 0x0028 -> SET ATTRIBUTE + u'\x89' # 0x0029 -> START FIELD EXTENDED + u'\x8a' # 0x002a -> SET MODE OR SWITCH + u'\x8b' # 0x002b -> CONTROL SEQUENCE PREFIX + u'\x8c' # 0x002c -> MODIFY FIELD ATTRIBUTE + u'\x05' # 0x002d -> ENQUIRY + u'\x06' # 0x002e -> ACKNOWLEDGE + u'\x07' # 0x002f -> BELL + u'\x90' # 0x0030 -> <reserved> + u'\x91' # 0x0031 -> <reserved> + u'\x16' # 0x0032 -> SYNCHRONOUS IDLE + u'\x93' # 0x0033 -> INDEX RETURN + u'\x94' # 0x0034 -> PRESENTATION POSITION + u'\x95' # 0x0035 -> TRANSPARENT + u'\x96' # 0x0036 -> NUMERIC BACKSPACE + u'\x04' # 0x0037 -> END OF TRANSMISSION + u'\x98' # 0x0038 -> SUBSCRIPT + u'\x99' # 0x0039 -> INDENT TABULATION + u'\x9a' # 0x003a -> REVERSE FORM FEED + u'\x9b' # 0x003b -> CUSTOMER USE THREE + u'\x14' # 0x003c -> DEVICE CONTROL FOUR + u'\x15' # 0x003d -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x003e -> <reserved> + u'\x1a' # 0x003f -> SUBSTITUTE + u' ' # 0x0040 -> SPACE + u'\u05d0' # 0x0041 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0042 -> HEBREW LETTER BET + u'\u05d2' # 0x0043 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0044 -> HEBREW LETTER DALET + u'\u05d4' # 0x0045 -> HEBREW LETTER HE + u'\u05d5' # 0x0046 -> HEBREW LETTER VAV + u'\u05d6' # 0x0047 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0048 -> HEBREW LETTER HET + u'\u05d8' # 0x0049 -> HEBREW LETTER TET + u'\xa2' # 0x004a -> CENT SIGN + u'.' # 0x004b -> FULL STOP + u'<' # 0x004c -> LESS-THAN SIGN + u'(' # 0x004d -> LEFT PARENTHESIS + u'+' # 0x004e -> PLUS SIGN + u'|' # 0x004f -> VERTICAL LINE + u'&' # 0x0050 -> AMPERSAND + u'\u05d9' # 0x0051 -> HEBREW LETTER YOD + u'\u05da' # 0x0052 -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x0053 -> HEBREW LETTER KAF + u'\u05dc' # 0x0054 -> HEBREW LETTER LAMED + u'\u05dd' # 0x0055 -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x0056 -> HEBREW LETTER MEM + u'\u05df' # 0x0057 -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0058 -> HEBREW LETTER NUN + u'\u05e1' # 0x0059 -> HEBREW LETTER SAMEKH + u'!' # 0x005a -> EXCLAMATION MARK + u'$' # 0x005b -> DOLLAR SIGN + u'*' # 0x005c -> ASTERISK + u')' # 0x005d -> RIGHT PARENTHESIS + u';' # 0x005e -> SEMICOLON + u'\xac' # 0x005f -> NOT SIGN + u'-' # 0x0060 -> HYPHEN-MINUS + u'/' # 0x0061 -> SOLIDUS + u'\u05e2' # 0x0062 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0063 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0064 -> HEBREW LETTER PE + u'\u05e5' # 0x0065 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0066 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0067 -> HEBREW LETTER QOF + u'\u05e8' # 0x0068 -> HEBREW LETTER RESH + u'\u05e9' # 0x0069 -> HEBREW LETTER SHIN + u'\xa6' # 0x006a -> BROKEN BAR + u',' # 0x006b -> COMMA + u'%' # 0x006c -> PERCENT SIGN + u'_' # 0x006d -> LOW LINE + u'>' # 0x006e -> GREATER-THAN SIGN + u'?' # 0x006f -> QUESTION MARK + u'\ufffe' # 0x0070 -> UNDEFINED + u'\u05ea' # 0x0071 -> HEBREW LETTER TAV + u'\ufffe' # 0x0072 -> UNDEFINED + u'\ufffe' # 0x0073 -> UNDEFINED + u'\xa0' # 0x0074 -> NO-BREAK SPACE + u'\ufffe' # 0x0075 -> UNDEFINED + u'\ufffe' # 0x0076 -> UNDEFINED + u'\ufffe' # 0x0077 -> UNDEFINED + u'\u2017' # 0x0078 -> DOUBLE LOW LINE + u'`' # 0x0079 -> GRAVE ACCENT + u':' # 0x007a -> COLON + u'#' # 0x007b -> NUMBER SIGN + u'@' # 0x007c -> COMMERCIAL AT + u"'" # 0x007d -> APOSTROPHE + u'=' # 0x007e -> EQUALS SIGN + u'"' # 0x007f -> QUOTATION MARK + u'\ufffe' # 0x0080 -> UNDEFINED + u'a' # 0x0081 -> LATIN SMALL LETTER A + u'b' # 0x0082 -> LATIN SMALL LETTER B + u'c' # 0x0083 -> LATIN SMALL LETTER C + u'd' # 0x0084 -> LATIN SMALL LETTER D + u'e' # 0x0085 -> LATIN SMALL LETTER E + u'f' # 0x0086 -> LATIN SMALL LETTER F + u'g' # 0x0087 -> LATIN SMALL LETTER G + u'h' # 0x0088 -> LATIN SMALL LETTER H + u'i' # 0x0089 -> LATIN SMALL LETTER I + u'\xab' # 0x008a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x008b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\xb1' # 0x008f -> PLUS-MINUS SIGN + u'\xb0' # 0x0090 -> DEGREE SIGN + u'j' # 0x0091 -> LATIN SMALL LETTER J + u'k' # 0x0092 -> LATIN SMALL LETTER K + u'l' # 0x0093 -> LATIN SMALL LETTER L + u'm' # 0x0094 -> LATIN SMALL LETTER M + u'n' # 0x0095 -> LATIN SMALL LETTER N + u'o' # 0x0096 -> LATIN SMALL LETTER O + u'p' # 0x0097 -> LATIN SMALL LETTER P + u'q' # 0x0098 -> LATIN SMALL LETTER Q + u'r' # 0x0099 -> LATIN SMALL LETTER R + u'\ufffe' # 0x009a -> UNDEFINED + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\xb8' # 0x009d -> CEDILLA + u'\ufffe' # 0x009e -> UNDEFINED + u'\xa4' # 0x009f -> CURRENCY SIGN + u'\xb5' # 0x00a0 -> MICRO SIGN + u'~' # 0x00a1 -> TILDE + u's' # 0x00a2 -> LATIN SMALL LETTER S + u't' # 0x00a3 -> LATIN SMALL LETTER T + u'u' # 0x00a4 -> LATIN SMALL LETTER U + u'v' # 0x00a5 -> LATIN SMALL LETTER V + u'w' # 0x00a6 -> LATIN SMALL LETTER W + u'x' # 0x00a7 -> LATIN SMALL LETTER X + u'y' # 0x00a8 -> LATIN SMALL LETTER Y + u'z' # 0x00a9 -> LATIN SMALL LETTER Z + u'\ufffe' # 0x00aa -> UNDEFINED + u'\ufffe' # 0x00ab -> UNDEFINED + u'\ufffe' # 0x00ac -> UNDEFINED + u'\ufffe' # 0x00ad -> UNDEFINED + u'\ufffe' # 0x00ae -> UNDEFINED + u'\xae' # 0x00af -> REGISTERED SIGN + u'^' # 0x00b0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0x00b1 -> POUND SIGN + u'\xa5' # 0x00b2 -> YEN SIGN + u'\xb7' # 0x00b3 -> MIDDLE DOT + u'\xa9' # 0x00b4 -> COPYRIGHT SIGN + u'\xa7' # 0x00b5 -> SECTION SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xbc' # 0x00b7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00b8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00b9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0x00ba -> LEFT SQUARE BRACKET + u']' # 0x00bb -> RIGHT SQUARE BRACKET + u'\xaf' # 0x00bc -> MACRON + u'\xa8' # 0x00bd -> DIAERESIS + u'\xb4' # 0x00be -> ACUTE ACCENT + u'\xd7' # 0x00bf -> MULTIPLICATION SIGN + u'{' # 0x00c0 -> LEFT CURLY BRACKET + u'A' # 0x00c1 -> LATIN CAPITAL LETTER A + u'B' # 0x00c2 -> LATIN CAPITAL LETTER B + u'C' # 0x00c3 -> LATIN CAPITAL LETTER C + u'D' # 0x00c4 -> LATIN CAPITAL LETTER D + u'E' # 0x00c5 -> LATIN CAPITAL LETTER E + u'F' # 0x00c6 -> LATIN CAPITAL LETTER F + u'G' # 0x00c7 -> LATIN CAPITAL LETTER G + u'H' # 0x00c8 -> LATIN CAPITAL LETTER H + u'I' # 0x00c9 -> LATIN CAPITAL LETTER I + u'\xad' # 0x00ca -> SOFT HYPHEN + u'\ufffe' # 0x00cb -> UNDEFINED + u'\ufffe' # 0x00cc -> UNDEFINED + u'\ufffe' # 0x00cd -> UNDEFINED + u'\ufffe' # 0x00ce -> UNDEFINED + u'\ufffe' # 0x00cf -> UNDEFINED + u'}' # 0x00d0 -> RIGHT CURLY BRACKET + u'J' # 0x00d1 -> LATIN CAPITAL LETTER J + u'K' # 0x00d2 -> LATIN CAPITAL LETTER K + u'L' # 0x00d3 -> LATIN CAPITAL LETTER L + u'M' # 0x00d4 -> LATIN CAPITAL LETTER M + u'N' # 0x00d5 -> LATIN CAPITAL LETTER N + u'O' # 0x00d6 -> LATIN CAPITAL LETTER O + u'P' # 0x00d7 -> LATIN CAPITAL LETTER P + u'Q' # 0x00d8 -> LATIN CAPITAL LETTER Q + u'R' # 0x00d9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0x00da -> SUPERSCRIPT ONE + u'\ufffe' # 0x00db -> UNDEFINED + u'\ufffe' # 0x00dc -> UNDEFINED + u'\ufffe' # 0x00dd -> UNDEFINED + u'\ufffe' # 0x00de -> UNDEFINED + u'\ufffe' # 0x00df -> UNDEFINED + u'\\' # 0x00e0 -> REVERSE SOLIDUS + u'\xf7' # 0x00e1 -> DIVISION SIGN + u'S' # 0x00e2 -> LATIN CAPITAL LETTER S + u'T' # 0x00e3 -> LATIN CAPITAL LETTER T + u'U' # 0x00e4 -> LATIN CAPITAL LETTER U + u'V' # 0x00e5 -> LATIN CAPITAL LETTER V + u'W' # 0x00e6 -> LATIN CAPITAL LETTER W + u'X' # 0x00e7 -> LATIN CAPITAL LETTER X + u'Y' # 0x00e8 -> LATIN CAPITAL LETTER Y + u'Z' # 0x00e9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0x00ea -> SUPERSCRIPT TWO + u'\ufffe' # 0x00eb -> UNDEFINED + u'\ufffe' # 0x00ec -> UNDEFINED + u'\ufffe' # 0x00ed -> UNDEFINED + u'\ufffe' # 0x00ee -> UNDEFINED + u'\ufffe' # 0x00ef -> UNDEFINED + u'0' # 0x00f0 -> DIGIT ZERO + u'1' # 0x00f1 -> DIGIT ONE + u'2' # 0x00f2 -> DIGIT TWO + u'3' # 0x00f3 -> DIGIT THREE + u'4' # 0x00f4 -> DIGIT FOUR + u'5' # 0x00f5 -> DIGIT FIVE + u'6' # 0x00f6 -> DIGIT SIX + u'7' # 0x00f7 -> DIGIT SEVEN + u'8' # 0x00f8 -> DIGIT EIGHT + u'9' # 0x00f9 -> DIGIT NINE + u'\xb3' # 0x00fa -> SUPERSCRIPT THREE + u'\ufffe' # 0x00fb -> UNDEFINED + u'\ufffe' # 0x00fc -> UNDEFINED + u'\ufffe' # 0x00fd -> UNDEFINED + u'\ufffe' # 0x00fe -> UNDEFINED + u'\x9f' # 0x00ff -> EIGHT ONES +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0037, # END OF TRANSMISSION + 0x0005: 0x002d, # ENQUIRY + 0x0006: 0x002e, # ACKNOWLEDGE + 0x0007: 0x002f, # BELL + 0x0008: 0x0016, # BACKSPACE + 0x0009: 0x0005, # HORIZONTAL TABULATION + 0x000a: 0x0025, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x003c, # DEVICE CONTROL FOUR + 0x0015: 0x003d, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0032, # SYNCHRONOUS IDLE + 0x0017: 0x0026, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x003f, # SUBSTITUTE + 0x001b: 0x0027, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0040, # SPACE + 0x0021: 0x005a, # EXCLAMATION MARK + 0x0022: 0x007f, # QUOTATION MARK + 0x0023: 0x007b, # NUMBER SIGN + 0x0024: 0x005b, # DOLLAR SIGN + 0x0025: 0x006c, # PERCENT SIGN + 0x0026: 0x0050, # AMPERSAND + 0x0027: 0x007d, # APOSTROPHE + 0x0028: 0x004d, # LEFT PARENTHESIS + 0x0029: 0x005d, # RIGHT PARENTHESIS + 0x002a: 0x005c, # ASTERISK + 0x002b: 0x004e, # PLUS SIGN + 0x002c: 0x006b, # COMMA + 0x002d: 0x0060, # HYPHEN-MINUS + 0x002e: 0x004b, # FULL STOP + 0x002f: 0x0061, # SOLIDUS + 0x0030: 0x00f0, # DIGIT ZERO + 0x0031: 0x00f1, # DIGIT ONE + 0x0032: 0x00f2, # DIGIT TWO + 0x0033: 0x00f3, # DIGIT THREE + 0x0034: 0x00f4, # DIGIT FOUR + 0x0035: 0x00f5, # DIGIT FIVE + 0x0036: 0x00f6, # DIGIT SIX + 0x0037: 0x00f7, # DIGIT SEVEN + 0x0038: 0x00f8, # DIGIT EIGHT + 0x0039: 0x00f9, # DIGIT NINE + 0x003a: 0x007a, # COLON + 0x003b: 0x005e, # SEMICOLON + 0x003c: 0x004c, # LESS-THAN SIGN + 0x003d: 0x007e, # EQUALS SIGN + 0x003e: 0x006e, # GREATER-THAN SIGN + 0x003f: 0x006f, # QUESTION MARK + 0x0040: 0x007c, # COMMERCIAL AT + 0x0041: 0x00c1, # LATIN CAPITAL LETTER A + 0x0042: 0x00c2, # LATIN CAPITAL LETTER B + 0x0043: 0x00c3, # LATIN CAPITAL LETTER C + 0x0044: 0x00c4, # LATIN CAPITAL LETTER D + 0x0045: 0x00c5, # LATIN CAPITAL LETTER E + 0x0046: 0x00c6, # LATIN CAPITAL LETTER F + 0x0047: 0x00c7, # LATIN CAPITAL LETTER G + 0x0048: 0x00c8, # LATIN CAPITAL LETTER H + 0x0049: 0x00c9, # LATIN CAPITAL LETTER I + 0x004a: 0x00d1, # LATIN CAPITAL LETTER J + 0x004b: 0x00d2, # LATIN CAPITAL LETTER K + 0x004c: 0x00d3, # LATIN CAPITAL LETTER L + 0x004d: 0x00d4, # LATIN CAPITAL LETTER M + 0x004e: 0x00d5, # LATIN CAPITAL LETTER N + 0x004f: 0x00d6, # LATIN CAPITAL LETTER O + 0x0050: 0x00d7, # LATIN CAPITAL LETTER P + 0x0051: 0x00d8, # LATIN CAPITAL LETTER Q + 0x0052: 0x00d9, # LATIN CAPITAL LETTER R + 0x0053: 0x00e2, # LATIN CAPITAL LETTER S + 0x0054: 0x00e3, # LATIN CAPITAL LETTER T + 0x0055: 0x00e4, # LATIN CAPITAL LETTER U + 0x0056: 0x00e5, # LATIN CAPITAL LETTER V + 0x0057: 0x00e6, # LATIN CAPITAL LETTER W + 0x0058: 0x00e7, # LATIN CAPITAL LETTER X + 0x0059: 0x00e8, # LATIN CAPITAL LETTER Y + 0x005a: 0x00e9, # LATIN CAPITAL LETTER Z + 0x005b: 0x00ba, # LEFT SQUARE BRACKET + 0x005c: 0x00e0, # REVERSE SOLIDUS + 0x005d: 0x00bb, # RIGHT SQUARE BRACKET + 0x005e: 0x00b0, # CIRCUMFLEX ACCENT + 0x005f: 0x006d, # LOW LINE + 0x0060: 0x0079, # GRAVE ACCENT + 0x0061: 0x0081, # LATIN SMALL LETTER A + 0x0062: 0x0082, # LATIN SMALL LETTER B + 0x0063: 0x0083, # LATIN SMALL LETTER C + 0x0064: 0x0084, # LATIN SMALL LETTER D + 0x0065: 0x0085, # LATIN SMALL LETTER E + 0x0066: 0x0086, # LATIN SMALL LETTER F + 0x0067: 0x0087, # LATIN SMALL LETTER G + 0x0068: 0x0088, # LATIN SMALL LETTER H + 0x0069: 0x0089, # LATIN SMALL LETTER I + 0x006a: 0x0091, # LATIN SMALL LETTER J + 0x006b: 0x0092, # LATIN SMALL LETTER K + 0x006c: 0x0093, # LATIN SMALL LETTER L + 0x006d: 0x0094, # LATIN SMALL LETTER M + 0x006e: 0x0095, # LATIN SMALL LETTER N + 0x006f: 0x0096, # LATIN SMALL LETTER O + 0x0070: 0x0097, # LATIN SMALL LETTER P + 0x0071: 0x0098, # LATIN SMALL LETTER Q + 0x0072: 0x0099, # LATIN SMALL LETTER R + 0x0073: 0x00a2, # LATIN SMALL LETTER S + 0x0074: 0x00a3, # LATIN SMALL LETTER T + 0x0075: 0x00a4, # LATIN SMALL LETTER U + 0x0076: 0x00a5, # LATIN SMALL LETTER V + 0x0077: 0x00a6, # LATIN SMALL LETTER W + 0x0078: 0x00a7, # LATIN SMALL LETTER X + 0x0079: 0x00a8, # LATIN SMALL LETTER Y + 0x007a: 0x00a9, # LATIN SMALL LETTER Z + 0x007b: 0x00c0, # LEFT CURLY BRACKET + 0x007c: 0x004f, # VERTICAL LINE + 0x007d: 0x00d0, # RIGHT CURLY BRACKET + 0x007e: 0x00a1, # TILDE + 0x007f: 0x0007, # DELETE + 0x0080: 0x0020, # DIGIT SELECT + 0x0081: 0x0021, # START OF SIGNIFICANCE + 0x0082: 0x0022, # FIELD SEPARATOR + 0x0083: 0x0023, # WORD UNDERSCORE + 0x0084: 0x0024, # BYPASS OR INHIBIT PRESENTATION + 0x0085: 0x0015, # NEW LINE + 0x0086: 0x0006, # REQUIRED NEW LINE + 0x0087: 0x0017, # PROGRAM OPERATOR COMMUNICATION + 0x0088: 0x0028, # SET ATTRIBUTE + 0x0089: 0x0029, # START FIELD EXTENDED + 0x008a: 0x002a, # SET MODE OR SWITCH + 0x008b: 0x002b, # CONTROL SEQUENCE PREFIX + 0x008c: 0x002c, # MODIFY FIELD ATTRIBUTE + 0x008d: 0x0009, # SUPERSCRIPT + 0x008e: 0x000a, # REPEAT + 0x008f: 0x001b, # CUSTOMER USE ONE + 0x0090: 0x0030, # <reserved> + 0x0091: 0x0031, # <reserved> + 0x0092: 0x001a, # UNIT BACK SPACE + 0x0093: 0x0033, # INDEX RETURN + 0x0094: 0x0034, # PRESENTATION POSITION + 0x0095: 0x0035, # TRANSPARENT + 0x0096: 0x0036, # NUMERIC BACKSPACE + 0x0097: 0x0008, # GRAPHIC ESCAPE + 0x0098: 0x0038, # SUBSCRIPT + 0x0099: 0x0039, # INDENT TABULATION + 0x009a: 0x003a, # REVERSE FORM FEED + 0x009b: 0x003b, # CUSTOMER USE THREE + 0x009c: 0x0004, # SELECT + 0x009d: 0x0014, # RESTORE/ENABLE PRESENTATION + 0x009e: 0x003e, # <reserved> + 0x009f: 0x00ff, # EIGHT ONES + 0x00a0: 0x0074, # NO-BREAK SPACE + 0x00a2: 0x004a, # CENT SIGN + 0x00a3: 0x00b1, # POUND SIGN + 0x00a4: 0x009f, # CURRENCY SIGN + 0x00a5: 0x00b2, # YEN SIGN + 0x00a6: 0x006a, # BROKEN BAR + 0x00a7: 0x00b5, # SECTION SIGN + 0x00a8: 0x00bd, # DIAERESIS + 0x00a9: 0x00b4, # COPYRIGHT SIGN + 0x00ab: 0x008a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x005f, # NOT SIGN + 0x00ad: 0x00ca, # SOFT HYPHEN + 0x00ae: 0x00af, # REGISTERED SIGN + 0x00af: 0x00bc, # MACRON + 0x00b0: 0x0090, # DEGREE SIGN + 0x00b1: 0x008f, # PLUS-MINUS SIGN + 0x00b2: 0x00ea, # SUPERSCRIPT TWO + 0x00b3: 0x00fa, # SUPERSCRIPT THREE + 0x00b4: 0x00be, # ACUTE ACCENT + 0x00b5: 0x00a0, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b3, # MIDDLE DOT + 0x00b8: 0x009d, # CEDILLA + 0x00b9: 0x00da, # SUPERSCRIPT ONE + 0x00bb: 0x008b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00b7, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00b8, # VULGAR FRACTION ONE HALF + 0x00be: 0x00b9, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x00bf, # MULTIPLICATION SIGN + 0x00f7: 0x00e1, # DIVISION SIGN + 0x05d0: 0x0041, # HEBREW LETTER ALEF + 0x05d1: 0x0042, # HEBREW LETTER BET + 0x05d2: 0x0043, # HEBREW LETTER GIMEL + 0x05d3: 0x0044, # HEBREW LETTER DALET + 0x05d4: 0x0045, # HEBREW LETTER HE + 0x05d5: 0x0046, # HEBREW LETTER VAV + 0x05d6: 0x0047, # HEBREW LETTER ZAYIN + 0x05d7: 0x0048, # HEBREW LETTER HET + 0x05d8: 0x0049, # HEBREW LETTER TET + 0x05d9: 0x0051, # HEBREW LETTER YOD + 0x05da: 0x0052, # HEBREW LETTER FINAL KAF + 0x05db: 0x0053, # HEBREW LETTER KAF + 0x05dc: 0x0054, # HEBREW LETTER LAMED + 0x05dd: 0x0055, # HEBREW LETTER FINAL MEM + 0x05de: 0x0056, # HEBREW LETTER MEM + 0x05df: 0x0057, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0058, # HEBREW LETTER NUN + 0x05e1: 0x0059, # HEBREW LETTER SAMEKH + 0x05e2: 0x0062, # HEBREW LETTER AYIN + 0x05e3: 0x0063, # HEBREW LETTER FINAL PE + 0x05e4: 0x0064, # HEBREW LETTER PE + 0x05e5: 0x0065, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0066, # HEBREW LETTER TSADI + 0x05e7: 0x0067, # HEBREW LETTER QOF + 0x05e8: 0x0068, # HEBREW LETTER RESH + 0x05e9: 0x0069, # HEBREW LETTER SHIN + 0x05ea: 0x0071, # HEBREW LETTER TAV + 0x2017: 0x0078, # DOUBLE LOW LINE +}
\ No newline at end of file diff --git a/Lib/encodings/cp437.py b/Lib/encodings/cp437.py index bfe218f..ee3fca3 100644 --- a/Lib/encodings/cp437.py +++ b/Lib/encodings/cp437.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp737.py b/Lib/encodings/cp737.py index a4729b1..e2ade6f 100644 --- a/Lib/encodings/cp737.py +++ b/Lib/encodings/cp737.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x008b: 0x039c, # GREEK CAPITAL LETTER MU - 0x008c: 0x039d, # GREEK CAPITAL LETTER NU - 0x008d: 0x039e, # GREEK CAPITAL LETTER XI - 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x0099: 0x03b2, # GREEK SMALL LETTER BETA - 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA - 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA - 0x009e: 0x03b7, # GREEK SMALL LETTER ETA - 0x009f: 0x03b8, # GREEK SMALL LETTER THETA - 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00a3: 0x03bc, # GREEK SMALL LETTER MU - 0x00a4: 0x03bd, # GREEK SMALL LETTER NU - 0x00a5: 0x03be, # GREEK SMALL LETTER XI - 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00a7: 0x03c0, # GREEK SMALL LETTER PI - 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO - 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI - 0x00af: 0x03c8, # GREEK SMALL LETTER PSI - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA + 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA + 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA + 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x008b: 0x039c, # GREEK CAPITAL LETTER MU + 0x008c: 0x039d, # GREEK CAPITAL LETTER NU + 0x008d: 0x039e, # GREEK CAPITAL LETTER XI + 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI + 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x0099: 0x03b2, # GREEK SMALL LETTER BETA + 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA + 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA + 0x009e: 0x03b7, # GREEK SMALL LETTER ETA + 0x009f: 0x03b8, # GREEK SMALL LETTER THETA + 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00a3: 0x03bc, # GREEK SMALL LETTER MU + 0x00a4: 0x03bd, # GREEK SMALL LETTER NU + 0x00a5: 0x03be, # GREEK SMALL LETTER XI + 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00a7: 0x03c0, # GREEK SMALL LETTER PI + 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO + 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI + 0x00af: 0x03c8, # GREEK SMALL LETTER PSI + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO + u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA + u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00f7: 0x00f6, # DIVISION SIGN + 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA + 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA + 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA + 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x008b, # GREEK CAPITAL LETTER MU + 0x039d: 0x008c, # GREEK CAPITAL LETTER NU + 0x039e: 0x008d, # GREEK CAPITAL LETTER XI + 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI + 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x0099, # GREEK SMALL LETTER BETA + 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA + 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA + 0x03b7: 0x009e, # GREEK SMALL LETTER ETA + 0x03b8: 0x009f, # GREEK SMALL LETTER THETA + 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00a3, # GREEK SMALL LETTER MU + 0x03bd: 0x00a4, # GREEK SMALL LETTER NU + 0x03be: 0x00a5, # GREEK SMALL LETTER XI + 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00a7, # GREEK SMALL LETTER PI + 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO + 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU + 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI + 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI + 0x03c8: 0x00af, # GREEK SMALL LETTER PSI + 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp775.py b/Lib/encodings/cp775.py index d8cda72..ae45b47 100644 --- a/Lib/encodings/cp775.py +++ b/Lib/encodings/cp775.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0096: 0x00a2, # CENT SIGN - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a7: 0x00a6, # BROKEN BAR - 0x00a8: 0x00a9, # COPYRIGHT SIGN - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0096: 0x00a2, # CENT SIGN + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a7: 0x00a6, # BROKEN BAR + 0x00a8: 0x00a9, # COPYRIGHT SIGN + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON + u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA + u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\xa2' # 0x0096 -> CENT SIGN + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\xa4' # 0x009f -> CURRENCY SIGN + u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE + u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK + u'\xa6' # 0x00a7 -> BROKEN BAR + u'\xa9' # 0x00a8 -> COPYRIGHT SIGN + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON + u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON + u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK + u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON + u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA + u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x0096, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x009f, # CURRENCY SIGN + 0x00a6: 0x00a7, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a9: 0x00a8, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK + 0x2219: 0x00f9, # BULLET OPERATOR + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp850.py b/Lib/encodings/cp850.py index ae09839..182692b 100644 --- a/Lib/encodings/cp850.py +++ b/Lib/encodings/cp850.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH + u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN + u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp852.py b/Lib/encodings/cp852.py index dad5d29..f1d60e8 100644 --- a/Lib/encodings/cp852.py +++ b/Lib/encodings/cp852.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00f2: 0x02db, # OGONEK - 0x00f3: 0x02c7, # CARON - 0x00f4: 0x02d8, # BREVE - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00f2: 0x02db, # OGONEK + 0x00f3: 0x02c7, # CARON + 0x00f4: 0x02d8, # BREVE + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON + u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK + u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON + u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK + u'\xac' # 0x00aa -> NOT SIGN + u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE + u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON + u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON + u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON + u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON + u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON + u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE + u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00f2 -> OGONEK + u'\u02c7' # 0x00f3 -> CARON + u'\u02d8' # 0x00f4 -> BREVE + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON + u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b8: 0x00f7, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00f3, # CARON + 0x02d8: 0x00f4, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02db: 0x00f2, # OGONEK + 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp855.py b/Lib/encodings/cp855.py index d93c5e8..c9baace 100644 --- a/Lib/encodings/cp855.py +++ b/Lib/encodings/cp855.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO - 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI - 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE - 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU - 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00ef: 0x2116, # NUMERO SIGN - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E - 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO + 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI + 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE + 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU + 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00ef: 0x2116, # NUMERO SIGN + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E + 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE + u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE + u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE + u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE + u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO + u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO + u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE + u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE + u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI + u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI + u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE + u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE + u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE + u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE + u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE + u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE + u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE + u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE + u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE + u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE + u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U + u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE + u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE + u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU + u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU + u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A + u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE + u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE + u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE + u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE + u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE + u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE + u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE + u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE + u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF + u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF + u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE + u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA + u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA + u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I + u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I + u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA + u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL + u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL + u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM + u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM + u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN + u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN + u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O + u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O + u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE + u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA + u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER + u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER + u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES + u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES + u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE + u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE + u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U + u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U + u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE + u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE + u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE + u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE + u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u2116' # 0x00ef -> NUMERO SIGN + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU + u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU + u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE + u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE + u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA + u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA + u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E + u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE + u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE + u'\xa7' # 0x00fd -> SECTION SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00fd, # SECTION SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I + 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O + 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U + 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E + 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA + 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO + 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI + 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE + 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0x00ef, # NUMERO SIGN + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp856.py b/Lib/encodings/cp856.py index 1bf67f0..358cfe9 100644 --- a/Lib/encodings/cp856.py +++ b/Lib/encodings/cp856.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP856.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,613 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: None, # UNDEFINED - 0x009c: 0x00a3, # POUND SIGN - 0x009d: None, # UNDEFINED - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: None, # UNDEFINED - 0x00a0: None, # UNDEFINED - 0x00a1: None, # UNDEFINED - 0x00a2: None, # UNDEFINED - 0x00a3: None, # UNDEFINED - 0x00a4: None, # UNDEFINED - 0x00a5: None, # UNDEFINED - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: None, # UNDEFINED - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: None, # UNDEFINED - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: None, # UNDEFINED - 0x00b6: None, # UNDEFINED - 0x00b7: None, # UNDEFINED - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: None, # UNDEFINED - 0x00c7: None, # UNDEFINED - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: None, # UNDEFINED - 0x00d1: None, # UNDEFINED - 0x00d2: None, # UNDEFINED - 0x00d3: None, # UNDEFINEDS - 0x00d4: None, # UNDEFINED - 0x00d5: None, # UNDEFINED - 0x00d6: None, # UNDEFINEDE - 0x00d7: None, # UNDEFINED - 0x00d8: None, # UNDEFINED - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: None, # UNDEFINED - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: None, # UNDEFINED - 0x00e1: None, # UNDEFINED - 0x00e2: None, # UNDEFINED - 0x00e3: None, # UNDEFINED - 0x00e4: None, # UNDEFINED - 0x00e5: None, # UNDEFINED - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: None, # UNDEFINED - 0x00e9: None, # UNDEFINED - 0x00ea: None, # UNDEFINED - 0x00eb: None, # UNDEFINED - 0x00ec: None, # UNDEFINED - 0x00ed: None, # UNDEFINED - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: None, # UNDEFINED + 0x009c: 0x00a3, # POUND SIGN + 0x009d: None, # UNDEFINED + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: None, # UNDEFINED + 0x00a0: None, # UNDEFINED + 0x00a1: None, # UNDEFINED + 0x00a2: None, # UNDEFINED + 0x00a3: None, # UNDEFINED + 0x00a4: None, # UNDEFINED + 0x00a5: None, # UNDEFINED + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: None, # UNDEFINED + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: None, # UNDEFINED + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: None, # UNDEFINED + 0x00b6: None, # UNDEFINED + 0x00b7: None, # UNDEFINED + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: None, # UNDEFINED + 0x00c7: None, # UNDEFINED + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: None, # UNDEFINED + 0x00d1: None, # UNDEFINED + 0x00d2: None, # UNDEFINED + 0x00d3: None, # UNDEFINEDS + 0x00d4: None, # UNDEFINED + 0x00d5: None, # UNDEFINED + 0x00d6: None, # UNDEFINEDE + 0x00d7: None, # UNDEFINED + 0x00d8: None, # UNDEFINED + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: None, # UNDEFINED + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: None, # UNDEFINED + 0x00e1: None, # UNDEFINED + 0x00e2: None, # UNDEFINED + 0x00e3: None, # UNDEFINED + 0x00e4: None, # UNDEFINED + 0x00e5: None, # UNDEFINED + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: None, # UNDEFINED + 0x00e9: None, # UNDEFINED + 0x00ea: None, # UNDEFINED + 0x00eb: None, # UNDEFINED + 0x00ec: None, # UNDEFINED + 0x00ed: None, # UNDEFINED + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0081 -> HEBREW LETTER BET + u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0083 -> HEBREW LETTER DALET + u'\u05d4' # 0x0084 -> HEBREW LETTER HE + u'\u05d5' # 0x0085 -> HEBREW LETTER VAV + u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0087 -> HEBREW LETTER HET + u'\u05d8' # 0x0088 -> HEBREW LETTER TET + u'\u05d9' # 0x0089 -> HEBREW LETTER YOD + u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x008b -> HEBREW LETTER KAF + u'\u05dc' # 0x008c -> HEBREW LETTER LAMED + u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x008e -> HEBREW LETTER MEM + u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0090 -> HEBREW LETTER NUN + u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0094 -> HEBREW LETTER PE + u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0097 -> HEBREW LETTER QOF + u'\u05e8' # 0x0098 -> HEBREW LETTER RESH + u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN + u'\u05ea' # 0x009a -> HEBREW LETTER TAV + u'\ufffe' # 0x009b -> UNDEFINED + u'\xa3' # 0x009c -> POUND SIGN + u'\ufffe' # 0x009d -> UNDEFINED + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\ufffe' # 0x009f -> UNDEFINED + u'\ufffe' # 0x00a0 -> UNDEFINED + u'\ufffe' # 0x00a1 -> UNDEFINED + u'\ufffe' # 0x00a2 -> UNDEFINED + u'\ufffe' # 0x00a3 -> UNDEFINED + u'\ufffe' # 0x00a4 -> UNDEFINED + u'\ufffe' # 0x00a5 -> UNDEFINED + u'\ufffe' # 0x00a6 -> UNDEFINED + u'\ufffe' # 0x00a7 -> UNDEFINED + u'\ufffe' # 0x00a8 -> UNDEFINED + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\ufffe' # 0x00ad -> UNDEFINED + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\ufffe' # 0x00b5 -> UNDEFINED + u'\ufffe' # 0x00b6 -> UNDEFINED + u'\ufffe' # 0x00b7 -> UNDEFINED + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\ufffe' # 0x00c6 -> UNDEFINED + u'\ufffe' # 0x00c7 -> UNDEFINED + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\ufffe' # 0x00d0 -> UNDEFINED + u'\ufffe' # 0x00d1 -> UNDEFINED + u'\ufffe' # 0x00d2 -> UNDEFINED + u'\ufffe' # 0x00d3 -> UNDEFINEDS + u'\ufffe' # 0x00d4 -> UNDEFINED + u'\ufffe' # 0x00d5 -> UNDEFINED + u'\ufffe' # 0x00d6 -> UNDEFINEDE + u'\ufffe' # 0x00d7 -> UNDEFINED + u'\ufffe' # 0x00d8 -> UNDEFINED + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\ufffe' # 0x00de -> UNDEFINED + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\ufffe' # 0x00e0 -> UNDEFINED + u'\ufffe' # 0x00e1 -> UNDEFINED + u'\ufffe' # 0x00e2 -> UNDEFINED + u'\ufffe' # 0x00e3 -> UNDEFINED + u'\ufffe' # 0x00e4 -> UNDEFINED + u'\ufffe' # 0x00e5 -> UNDEFINED + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\ufffe' # 0x00e7 -> UNDEFINED + u'\ufffe' # 0x00e8 -> UNDEFINED + u'\ufffe' # 0x00e9 -> UNDEFINED + u'\ufffe' # 0x00ea -> UNDEFINED + u'\ufffe' # 0x00eb -> UNDEFINED + u'\ufffe' # 0x00ec -> UNDEFINED + u'\ufffe' # 0x00ed -> UNDEFINED + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00f7: 0x00f6, # DIVISION SIGN + 0x05d0: 0x0080, # HEBREW LETTER ALEF + 0x05d1: 0x0081, # HEBREW LETTER BET + 0x05d2: 0x0082, # HEBREW LETTER GIMEL + 0x05d3: 0x0083, # HEBREW LETTER DALET + 0x05d4: 0x0084, # HEBREW LETTER HE + 0x05d5: 0x0085, # HEBREW LETTER VAV + 0x05d6: 0x0086, # HEBREW LETTER ZAYIN + 0x05d7: 0x0087, # HEBREW LETTER HET + 0x05d8: 0x0088, # HEBREW LETTER TET + 0x05d9: 0x0089, # HEBREW LETTER YOD + 0x05da: 0x008a, # HEBREW LETTER FINAL KAF + 0x05db: 0x008b, # HEBREW LETTER KAF + 0x05dc: 0x008c, # HEBREW LETTER LAMED + 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM + 0x05de: 0x008e, # HEBREW LETTER MEM + 0x05df: 0x008f, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0090, # HEBREW LETTER NUN + 0x05e1: 0x0091, # HEBREW LETTER SAMEKH + 0x05e2: 0x0092, # HEBREW LETTER AYIN + 0x05e3: 0x0093, # HEBREW LETTER FINAL PE + 0x05e4: 0x0094, # HEBREW LETTER PE + 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0096, # HEBREW LETTER TSADI + 0x05e7: 0x0097, # HEBREW LETTER QOF + 0x05e8: 0x0098, # HEBREW LETTER RESH + 0x05e9: 0x0099, # HEBREW LETTER SHIN + 0x05ea: 0x009a, # HEBREW LETTER TAV + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp857.py b/Lib/encodings/cp857.py index bc26241..507aefa 100644 --- a/Lib/encodings/cp857.py +++ b/Lib/encodings/cp857.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,135 +32,650 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: None, # UNDEFINED - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: 0x00d7, # MULTIPLICATION SIGN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: None, # UNDEFINED - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: None, # UNDEFINED + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: 0x00d7, # MULTIPLICATION SIGN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: None, # UNDEFINED + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR + u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\ufffe' # 0x00d5 -> UNDEFINED + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\ufffe' # 0x00e7 -> UNDEFINED + u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\ufffe' # 0x00f2 -> UNDEFINED + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00e8, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp860.py b/Lib/encodings/cp860.py index ae0fcd8..3a012bf 100644 --- a/Lib/encodings/cp860.py +++ b/Lib/encodings/cp860.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp861.py b/Lib/encodings/cp861.py index 4d15b81..110ad6c 100644 --- a/Lib/encodings/cp861.py +++ b/Lib/encodings/cp861.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH - 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00fe, # LATIN SMALL LETTER THORN - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH + 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00fe, # LATIN SMALL LETTER THORN + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH + u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH + 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f0: 0x008c, # LATIN SMALL LETTER ETH + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x0095, # LATIN SMALL LETTER THORN + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp862.py b/Lib/encodings/cp862.py index f892002..ba7da2a 100644 --- a/Lib/encodings/cp862.py +++ b/Lib/encodings/cp862.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0081 -> HEBREW LETTER BET + u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0083 -> HEBREW LETTER DALET + u'\u05d4' # 0x0084 -> HEBREW LETTER HE + u'\u05d5' # 0x0085 -> HEBREW LETTER VAV + u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0087 -> HEBREW LETTER HET + u'\u05d8' # 0x0088 -> HEBREW LETTER TET + u'\u05d9' # 0x0089 -> HEBREW LETTER YOD + u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x008b -> HEBREW LETTER KAF + u'\u05dc' # 0x008c -> HEBREW LETTER LAMED + u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x008e -> HEBREW LETTER MEM + u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0090 -> HEBREW LETTER NUN + u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0094 -> HEBREW LETTER PE + u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0097 -> HEBREW LETTER QOF + u'\u05e8' # 0x0098 -> HEBREW LETTER RESH + u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN + u'\u05ea' # 0x009a -> HEBREW LETTER TAV + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x05d0: 0x0080, # HEBREW LETTER ALEF + 0x05d1: 0x0081, # HEBREW LETTER BET + 0x05d2: 0x0082, # HEBREW LETTER GIMEL + 0x05d3: 0x0083, # HEBREW LETTER DALET + 0x05d4: 0x0084, # HEBREW LETTER HE + 0x05d5: 0x0085, # HEBREW LETTER VAV + 0x05d6: 0x0086, # HEBREW LETTER ZAYIN + 0x05d7: 0x0087, # HEBREW LETTER HET + 0x05d8: 0x0088, # HEBREW LETTER TET + 0x05d9: 0x0089, # HEBREW LETTER YOD + 0x05da: 0x008a, # HEBREW LETTER FINAL KAF + 0x05db: 0x008b, # HEBREW LETTER KAF + 0x05dc: 0x008c, # HEBREW LETTER LAMED + 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM + 0x05de: 0x008e, # HEBREW LETTER MEM + 0x05df: 0x008f, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0090, # HEBREW LETTER NUN + 0x05e1: 0x0091, # HEBREW LETTER SAMEKH + 0x05e2: 0x0092, # HEBREW LETTER AYIN + 0x05e3: 0x0093, # HEBREW LETTER FINAL PE + 0x05e4: 0x0094, # HEBREW LETTER PE + 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0096, # HEBREW LETTER TSADI + 0x05e7: 0x0097, # HEBREW LETTER QOF + 0x05e8: 0x0098, # HEBREW LETTER RESH + 0x05e9: 0x0099, # HEBREW LETTER SHIN + 0x05ea: 0x009a, # HEBREW LETTER TAV + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp863.py b/Lib/encodings/cp863.py index 5f823d1..a19ff53 100644 --- a/Lib/encodings/cp863.py +++ b/Lib/encodings/cp863.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00b6, # PILCROW SIGN - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x2017, # DOUBLE LOW LINE - 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x008f: 0x00a7, # SECTION SIGN - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00a4, # CURRENCY SIGN - 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00a6, # BROKEN BAR - 0x00a1: 0x00b4, # ACUTE ACCENT - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00a8, # DIAERESIS - 0x00a5: 0x00b8, # CEDILLA - 0x00a6: 0x00b3, # SUPERSCRIPT THREE - 0x00a7: 0x00af, # MACRON - 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00b6, # PILCROW SIGN + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x2017, # DOUBLE LOW LINE + 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x008f: 0x00a7, # SECTION SIGN + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00a4, # CURRENCY SIGN + 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00a6, # BROKEN BAR + 0x00a1: 0x00b4, # ACUTE ACCENT + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00a8, # DIAERESIS + 0x00a5: 0x00b8, # CEDILLA + 0x00a6: 0x00b3, # SUPERSCRIPT THREE + 0x00a7: 0x00af, # MACRON + 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xb6' # 0x0086 -> PILCROW SIGN + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u2017' # 0x008d -> DOUBLE LOW LINE + u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa7' # 0x008f -> SECTION SIGN + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xa4' # 0x0098 -> CURRENCY SIGN + u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xa6' # 0x00a0 -> BROKEN BAR + u'\xb4' # 0x00a1 -> ACUTE ACCENT + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xa8' # 0x00a4 -> DIAERESIS + u'\xb8' # 0x00a5 -> CEDILLA + u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE + u'\xaf' # 0x00a7 -> MACRON + u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x0098, # CURRENCY SIGN + 0x00a6: 0x00a0, # BROKEN BAR + 0x00a7: 0x008f, # SECTION SIGN + 0x00a8: 0x00a4, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00af: 0x00a7, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00a6, # SUPERSCRIPT THREE + 0x00b4: 0x00a1, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x0086, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00a5, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS + 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x2017: 0x008d, # DOUBLE LOW LINE + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp864.py b/Lib/encodings/cp864.py index ac59217..9480d50 100644 --- a/Lib/encodings/cp864.py +++ b/Lib/encodings/cp864.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,134 +32,646 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0025: 0x066a, # ARABIC PERCENT SIGN - 0x0080: 0x00b0, # DEGREE SIGN - 0x0081: 0x00b7, # MIDDLE DOT - 0x0082: 0x2219, # BULLET OPERATOR - 0x0083: 0x221a, # SQUARE ROOT - 0x0084: 0x2592, # MEDIUM SHADE - 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL - 0x0086: 0x2502, # FORMS LIGHT VERTICAL - 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT - 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL - 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT - 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL - 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT - 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT - 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT - 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT - 0x0090: 0x03b2, # GREEK SMALL BETA - 0x0091: 0x221e, # INFINITY - 0x0092: 0x03c6, # GREEK SMALL PHI - 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN - 0x0094: 0x00bd, # FRACTION 1/2 - 0x0095: 0x00bc, # FRACTION 1/4 - 0x0096: 0x2248, # ALMOST EQUAL TO - 0x0097: 0x00ab, # LEFT POINTING GUILLEMET - 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET - 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM - 0x009f: None, # UNDEFINED - 0x00a1: 0x00ad, # SOFT HYPHEN - 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE - 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x00a2, # CENT SIGN - 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00db: 0x00a6, # BROKEN VERTICAL BAR - 0x00dc: 0x00ac, # NOT SIGN - 0x00dd: 0x00f7, # DIVISION SIGN - 0x00de: 0x00d7, # MULTIPLICATION SIGN - 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM - 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM - 0x00f1: 0x0651, # ARABIC SHADDAH - 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM - 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM - 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: None, # UNDEFINED + 0x0025: 0x066a, # ARABIC PERCENT SIGN + 0x0080: 0x00b0, # DEGREE SIGN + 0x0081: 0x00b7, # MIDDLE DOT + 0x0082: 0x2219, # BULLET OPERATOR + 0x0083: 0x221a, # SQUARE ROOT + 0x0084: 0x2592, # MEDIUM SHADE + 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL + 0x0086: 0x2502, # FORMS LIGHT VERTICAL + 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT + 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL + 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT + 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL + 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT + 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT + 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT + 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT + 0x0090: 0x03b2, # GREEK SMALL BETA + 0x0091: 0x221e, # INFINITY + 0x0092: 0x03c6, # GREEK SMALL PHI + 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN + 0x0094: 0x00bd, # FRACTION 1/2 + 0x0095: 0x00bc, # FRACTION 1/4 + 0x0096: 0x2248, # ALMOST EQUAL TO + 0x0097: 0x00ab, # LEFT POINTING GUILLEMET + 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET + 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM + 0x009f: None, # UNDEFINED + 0x00a1: 0x00ad, # SOFT HYPHEN + 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE + 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x00a2, # CENT SIGN + 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00db: 0x00a6, # BROKEN VERTICAL BAR + 0x00dc: 0x00ac, # NOT SIGN + 0x00dd: 0x00f7, # DIVISION SIGN + 0x00de: 0x00d7, # MULTIPLICATION SIGN + 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM + 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00f1: 0x0651, # ARABIC SHADDAH + 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM + 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM + 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xb0' # 0x0080 -> DEGREE SIGN + u'\xb7' # 0x0081 -> MIDDLE DOT + u'\u2219' # 0x0082 -> BULLET OPERATOR + u'\u221a' # 0x0083 -> SQUARE ROOT + u'\u2592' # 0x0084 -> MEDIUM SHADE + u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL + u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL + u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL + u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT + u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL + u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT + u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT + u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT + u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT + u'\u03b2' # 0x0090 -> GREEK SMALL BETA + u'\u221e' # 0x0091 -> INFINITY + u'\u03c6' # 0x0092 -> GREEK SMALL PHI + u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN + u'\xbd' # 0x0094 -> FRACTION 1/2 + u'\xbc' # 0x0095 -> FRACTION 1/4 + u'\u2248' # 0x0096 -> ALMOST EQUAL TO + u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET + u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET + u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NON-BREAKING SPACE + u'\xad' # 0x00a1 -> SOFT HYPHEN + u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x00a6 -> UNDEFINED + u'\ufffe' # 0x00a7 -> UNDEFINED + u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM + u'\u060c' # 0x00ac -> ARABIC COMMA + u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM + u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO + u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE + u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO + u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE + u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR + u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE + u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX + u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN + u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT + u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE + u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\xa2' # 0x00c0 -> CENT SIGN + u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM + u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM + u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM + u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM + u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM + u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM + u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM + u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM + u'\xa6' # 0x00db -> BROKEN VERTICAL BAR + u'\xac' # 0x00dc -> NOT SIGN + u'\xf7' # 0x00dd -> DIVISION SIGN + u'\xd7' # 0x00de -> MULTIPLICATION SIGN + u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM + u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM + u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM + u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM + u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM + u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM + u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM + u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM + u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM + u'\u0651' # 0x00f1 -> ARABIC SHADDAH + u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM + u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM + u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM + u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM + u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM + u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM + u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NON-BREAKING SPACE + 0x00a2: 0x00c0, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00db, # BROKEN VERTICAL BAR + 0x00ab: 0x0097, # LEFT POINTING GUILLEMET + 0x00ac: 0x00dc, # NOT SIGN + 0x00ad: 0x00a1, # SOFT HYPHEN + 0x00b0: 0x0080, # DEGREE SIGN + 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN + 0x00b7: 0x0081, # MIDDLE DOT + 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET + 0x00bc: 0x0095, # FRACTION 1/4 + 0x00bd: 0x0094, # FRACTION 1/2 + 0x00d7: 0x00de, # MULTIPLICATION SIGN + 0x00f7: 0x00dd, # DIVISION SIGN + 0x03b2: 0x0090, # GREEK SMALL BETA + 0x03c6: 0x0092, # GREEK SMALL PHI + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0651: 0x00f1, # ARABIC SHADDAH + 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO + 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE + 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO + 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE + 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR + 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE + 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX + 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN + 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT + 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE + 0x066a: 0x0025, # ARABIC PERCENT SIGN + 0x2219: 0x0082, # BULLET OPERATOR + 0x221a: 0x0083, # SQUARE ROOT + 0x221e: 0x0091, # INFINITY + 0x2248: 0x0096, # ALMOST EQUAL TO + 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL + 0x2502: 0x0086, # FORMS LIGHT VERTICAL + 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT + 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT + 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT + 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT + 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT + 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT + 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL + 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x2592: 0x0084, # MEDIUM SHADE + 0x25a0: 0x00fe, # BLACK SQUARE + 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM + 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM + 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM + 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM + 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM + 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM + 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM + 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM + 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM + 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM + 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM + 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM + 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM + 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM + 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM + 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM + 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM + 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM + 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM + 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM + 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM + 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM + 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM + 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM + 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM + 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM + 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM + 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM + 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM + 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM + 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM + 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM + 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM + 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM + 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM + 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM + 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM + 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM + 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM + 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM + 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM + 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM + 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM + 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM + 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM +}
\ No newline at end of file diff --git a/Lib/encodings/cp865.py b/Lib/encodings/cp865.py index b4f88f6..9d2f5c2 100644 --- a/Lib/encodings/cp865.py +++ b/Lib/encodings/cp865.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00a4, # CURRENCY SIGN - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00a4, # CURRENCY SIGN + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xa4' # 0x00af -> CURRENCY SIGN + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00af, # CURRENCY SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py index a6a26e5..0061ff6 100644 --- a/Lib/encodings/cp866.py +++ b/Lib/encodings/cp866.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O - 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x2116, # NUMERO SIGN - 0x00fd: 0x00a4, # CURRENCY SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O + 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x2116, # NUMERO SIGN + 0x00fd: 0x00a4, # CURRENCY SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA + u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO + u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI + u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u2116' # 0x00fc -> NUMERO SIGN + u'\xa4' # 0x00fd -> CURRENCY SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00fd, # CURRENCY SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI + 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U + 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O + 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E + 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO + 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI + 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U + 0x2116: 0x00fc, # NUMERO SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp869.py b/Lib/encodings/cp869.py index 21b0ab8..8d5caea 100644 --- a/Lib/encodings/cp869.py +++ b/Lib/encodings/cp869.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,645 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: None, # UNDEFINED - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: None, # UNDEFINED - 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0087: None, # UNDEFINED - 0x0088: 0x00b7, # MIDDLE DOT - 0x0089: 0x00ac, # NOT SIGN - 0x008a: 0x00a6, # BROKEN BAR - 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x008e: 0x2015, # HORIZONTAL BAR - 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0093: None, # UNDEFINED - 0x0094: None, # UNDEFINED - 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x0097: 0x00a9, # COPYRIGHT SIGN - 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0099: 0x00b2, # SUPERSCRIPT TWO - 0x009a: 0x00b3, # SUPERSCRIPT THREE - 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU - 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI - 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA - 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00e6: 0x03bc, # GREEK SMALL LETTER MU - 0x00e7: 0x03bd, # GREEK SMALL LETTER NU - 0x00e8: 0x03be, # GREEK SMALL LETTER XI - 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ea: 0x03c0, # GREEK SMALL LETTER PI - 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO - 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ef: 0x0384, # GREEK TONOS - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: None, # UNDEFINED + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: None, # UNDEFINED + 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0087: None, # UNDEFINED + 0x0088: 0x00b7, # MIDDLE DOT + 0x0089: 0x00ac, # NOT SIGN + 0x008a: 0x00a6, # BROKEN BAR + 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x008e: 0x2015, # HORIZONTAL BAR + 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x0093: None, # UNDEFINED + 0x0094: None, # UNDEFINED + 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x0097: 0x00a9, # COPYRIGHT SIGN + 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0099: 0x00b2, # SUPERSCRIPT TWO + 0x009a: 0x00b3, # SUPERSCRIPT THREE + 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU + 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI + 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA + 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00e6: 0x03bc, # GREEK SMALL LETTER MU + 0x00e7: 0x03bd, # GREEK SMALL LETTER NU + 0x00e8: 0x03be, # GREEK SMALL LETTER XI + 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00ea: 0x03c0, # GREEK SMALL LETTER PI + 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO + 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ef: 0x0384, # GREEK TONOS + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\ufffe' # 0x0080 -> UNDEFINED + u'\ufffe' # 0x0081 -> UNDEFINED + u'\ufffe' # 0x0082 -> UNDEFINED + u'\ufffe' # 0x0083 -> UNDEFINED + u'\ufffe' # 0x0084 -> UNDEFINED + u'\ufffe' # 0x0085 -> UNDEFINED + u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\ufffe' # 0x0087 -> UNDEFINED + u'\xb7' # 0x0088 -> MIDDLE DOT + u'\xac' # 0x0089 -> NOT SIGN + u'\xa6' # 0x008a -> BROKEN BAR + u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK + u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u2015' # 0x008e -> HORIZONTAL BAR + u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\ufffe' # 0x0093 -> UNDEFINED + u'\ufffe' # 0x0094 -> UNDEFINED + u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xa9' # 0x0097 -> COPYRIGHT SIGN + u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb2' # 0x0099 -> SUPERSCRIPT TWO + u'\xb3' # 0x009a -> SUPERSCRIPT THREE + u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\xa3' # 0x009c -> POUND SIGN + u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU + u'\u0384' # 0x00ef -> GREEK TONOS + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI + u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a3: 0x009c, # POUND SIGN + 0x00a6: 0x008a, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x0097, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x0089, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x0099, # SUPERSCRIPT TWO + 0x00b3: 0x009a, # SUPERSCRIPT THREE + 0x00b7: 0x0088, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x0384: 0x00ef, # GREEK TONOS + 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS + 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU + 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU + 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI + 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA + 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00e6, # GREEK SMALL LETTER MU + 0x03bd: 0x00e7, # GREEK SMALL LETTER NU + 0x03be: 0x00e8, # GREEK SMALL LETTER XI + 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00ea, # GREEK SMALL LETTER PI + 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO + 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI + 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x008e, # HORIZONTAL BAR + 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/cp874.py b/Lib/encodings/cp874.py index 01c5eb6..b703cbe 100644 --- a/Lib/encodings/cp874.py +++ b/Lib/encodings/cp874.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,135 +32,622 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: None, # UNDEFINED - 0x0087: None, # UNDEFINED - 0x0088: None, # UNDEFINED - 0x0089: None, # UNDEFINED - 0x008a: None, # UNDEFINED - 0x008b: None, # UNDEFINED - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: None, # UNDEFINED - 0x009a: None, # UNDEFINED - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a1: 0x0e01, # THAI CHARACTER KO KAI - 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI - 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT - 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI - 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON - 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG - 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU - 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN - 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING - 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG - 0x00ab: 0x0e0b, # THAI CHARACTER SO SO - 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE - 0x00ad: 0x0e0d, # THAI CHARACTER YO YING - 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA - 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK - 0x00b0: 0x0e10, # THAI CHARACTER THO THAN - 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO - 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO - 0x00b3: 0x0e13, # THAI CHARACTER NO NEN - 0x00b4: 0x0e14, # THAI CHARACTER DO DEK - 0x00b5: 0x0e15, # THAI CHARACTER TO TAO - 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG - 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN - 0x00b8: 0x0e18, # THAI CHARACTER THO THONG - 0x00b9: 0x0e19, # THAI CHARACTER NO NU - 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI - 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA - 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG - 0x00bd: 0x0e1d, # THAI CHARACTER FO FA - 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN - 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN - 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO - 0x00c1: 0x0e21, # THAI CHARACTER MO MA - 0x00c2: 0x0e22, # THAI CHARACTER YO YAK - 0x00c3: 0x0e23, # THAI CHARACTER RO RUA - 0x00c4: 0x0e24, # THAI CHARACTER RU - 0x00c5: 0x0e25, # THAI CHARACTER LO LING - 0x00c6: 0x0e26, # THAI CHARACTER LU - 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN - 0x00c8: 0x0e28, # THAI CHARACTER SO SALA - 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI - 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA - 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP - 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA - 0x00cd: 0x0e2d, # THAI CHARACTER O ANG - 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK - 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI - 0x00d0: 0x0e30, # THAI CHARACTER SARA A - 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT - 0x00d2: 0x0e32, # THAI CHARACTER SARA AA - 0x00d3: 0x0e33, # THAI CHARACTER SARA AM - 0x00d4: 0x0e34, # THAI CHARACTER SARA I - 0x00d5: 0x0e35, # THAI CHARACTER SARA II - 0x00d6: 0x0e36, # THAI CHARACTER SARA UE - 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE - 0x00d8: 0x0e38, # THAI CHARACTER SARA U - 0x00d9: 0x0e39, # THAI CHARACTER SARA UU - 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT - 0x00e0: 0x0e40, # THAI CHARACTER SARA E - 0x00e1: 0x0e41, # THAI CHARACTER SARA AE - 0x00e2: 0x0e42, # THAI CHARACTER SARA O - 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN - 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI - 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO - 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK - 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU - 0x00e8: 0x0e48, # THAI CHARACTER MAI EK - 0x00e9: 0x0e49, # THAI CHARACTER MAI THO - 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI - 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA - 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT - 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT - 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN - 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN - 0x00f0: 0x0e50, # THAI DIGIT ZERO - 0x00f1: 0x0e51, # THAI DIGIT ONE - 0x00f2: 0x0e52, # THAI DIGIT TWO - 0x00f3: 0x0e53, # THAI DIGIT THREE - 0x00f4: 0x0e54, # THAI DIGIT FOUR - 0x00f5: 0x0e55, # THAI DIGIT FIVE - 0x00f6: 0x0e56, # THAI DIGIT SIX - 0x00f7: 0x0e57, # THAI DIGIT SEVEN - 0x00f8: 0x0e58, # THAI DIGIT EIGHT - 0x00f9: 0x0e59, # THAI DIGIT NINE - 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU - 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT - 0x00fc: None, # UNDEFINED - 0x00fd: None, # UNDEFINED - 0x00fe: None, # UNDEFINED - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: None, # UNDEFINED + 0x0087: None, # UNDEFINED + 0x0088: None, # UNDEFINED + 0x0089: None, # UNDEFINED + 0x008a: None, # UNDEFINED + 0x008b: None, # UNDEFINED + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: None, # UNDEFINED + 0x009a: None, # UNDEFINED + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a1: 0x0e01, # THAI CHARACTER KO KAI + 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI + 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT + 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI + 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON + 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG + 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU + 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN + 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING + 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG + 0x00ab: 0x0e0b, # THAI CHARACTER SO SO + 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE + 0x00ad: 0x0e0d, # THAI CHARACTER YO YING + 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA + 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK + 0x00b0: 0x0e10, # THAI CHARACTER THO THAN + 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO + 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO + 0x00b3: 0x0e13, # THAI CHARACTER NO NEN + 0x00b4: 0x0e14, # THAI CHARACTER DO DEK + 0x00b5: 0x0e15, # THAI CHARACTER TO TAO + 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG + 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN + 0x00b8: 0x0e18, # THAI CHARACTER THO THONG + 0x00b9: 0x0e19, # THAI CHARACTER NO NU + 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI + 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA + 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG + 0x00bd: 0x0e1d, # THAI CHARACTER FO FA + 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN + 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN + 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO + 0x00c1: 0x0e21, # THAI CHARACTER MO MA + 0x00c2: 0x0e22, # THAI CHARACTER YO YAK + 0x00c3: 0x0e23, # THAI CHARACTER RO RUA + 0x00c4: 0x0e24, # THAI CHARACTER RU + 0x00c5: 0x0e25, # THAI CHARACTER LO LING + 0x00c6: 0x0e26, # THAI CHARACTER LU + 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN + 0x00c8: 0x0e28, # THAI CHARACTER SO SALA + 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI + 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA + 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP + 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA + 0x00cd: 0x0e2d, # THAI CHARACTER O ANG + 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK + 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI + 0x00d0: 0x0e30, # THAI CHARACTER SARA A + 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT + 0x00d2: 0x0e32, # THAI CHARACTER SARA AA + 0x00d3: 0x0e33, # THAI CHARACTER SARA AM + 0x00d4: 0x0e34, # THAI CHARACTER SARA I + 0x00d5: 0x0e35, # THAI CHARACTER SARA II + 0x00d6: 0x0e36, # THAI CHARACTER SARA UE + 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE + 0x00d8: 0x0e38, # THAI CHARACTER SARA U + 0x00d9: 0x0e39, # THAI CHARACTER SARA UU + 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT + 0x00e0: 0x0e40, # THAI CHARACTER SARA E + 0x00e1: 0x0e41, # THAI CHARACTER SARA AE + 0x00e2: 0x0e42, # THAI CHARACTER SARA O + 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN + 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI + 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO + 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK + 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU + 0x00e8: 0x0e48, # THAI CHARACTER MAI EK + 0x00e9: 0x0e49, # THAI CHARACTER MAI THO + 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI + 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA + 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT + 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT + 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN + 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN + 0x00f0: 0x0e50, # THAI DIGIT ZERO + 0x00f1: 0x0e51, # THAI DIGIT ONE + 0x00f2: 0x0e52, # THAI DIGIT TWO + 0x00f3: 0x0e53, # THAI DIGIT THREE + 0x00f4: 0x0e54, # THAI DIGIT FOUR + 0x00f5: 0x0e55, # THAI DIGIT FIVE + 0x00f6: 0x0e56, # THAI DIGIT SIX + 0x00f7: 0x0e57, # THAI DIGIT SEVEN + 0x00f8: 0x0e58, # THAI DIGIT EIGHT + 0x00f9: 0x0e59, # THAI DIGIT NINE + 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU + 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT + 0x00fc: None, # UNDEFINED + 0x00fd: None, # UNDEFINED + 0x00fe: None, # UNDEFINED + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\ufffe' # 0x0082 -> UNDEFINED + u'\ufffe' # 0x0083 -> UNDEFINED + u'\ufffe' # 0x0084 -> UNDEFINED + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\ufffe' # 0x0086 -> UNDEFINED + u'\ufffe' # 0x0087 -> UNDEFINED + u'\ufffe' # 0x0088 -> UNDEFINED + u'\ufffe' # 0x0089 -> UNDEFINED + u'\ufffe' # 0x008a -> UNDEFINED + u'\ufffe' # 0x008b -> UNDEFINED + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\ufffe' # 0x0099 -> UNDEFINED + u'\ufffe' # 0x009a -> UNDEFINED + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0e01' # 0x00a1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0x00a2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0x00a3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0x00a4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0x00a5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0x00a6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0x00a7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0x00a8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0x00a9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0x00aa -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0x00ab -> THAI CHARACTER SO SO + u'\u0e0c' # 0x00ac -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0x00ad -> THAI CHARACTER YO YING + u'\u0e0e' # 0x00ae -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0x00af -> THAI CHARACTER TO PATAK + u'\u0e10' # 0x00b0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0x00b1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0x00b2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0x00b3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0x00b4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0x00b5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0x00b6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0x00b7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0x00b8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0x00b9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0x00ba -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0x00bb -> THAI CHARACTER PO PLA + u'\u0e1c' # 0x00bc -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0x00bd -> THAI CHARACTER FO FA + u'\u0e1e' # 0x00be -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0x00bf -> THAI CHARACTER FO FAN + u'\u0e20' # 0x00c0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0x00c1 -> THAI CHARACTER MO MA + u'\u0e22' # 0x00c2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0x00c3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0x00c4 -> THAI CHARACTER RU + u'\u0e25' # 0x00c5 -> THAI CHARACTER LO LING + u'\u0e26' # 0x00c6 -> THAI CHARACTER LU + u'\u0e27' # 0x00c7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0x00c8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0x00c9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0x00ca -> THAI CHARACTER SO SUA + u'\u0e2b' # 0x00cb -> THAI CHARACTER HO HIP + u'\u0e2c' # 0x00cc -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0x00cd -> THAI CHARACTER O ANG + u'\u0e2e' # 0x00ce -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0x00cf -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0x00d0 -> THAI CHARACTER SARA A + u'\u0e31' # 0x00d1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0x00d2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0x00d3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0x00d4 -> THAI CHARACTER SARA I + u'\u0e35' # 0x00d5 -> THAI CHARACTER SARA II + u'\u0e36' # 0x00d6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0x00d7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0x00d8 -> THAI CHARACTER SARA U + u'\u0e39' # 0x00d9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0x00da -> THAI CHARACTER PHINTHU + u'\ufffe' # 0x00db -> UNDEFINED + u'\ufffe' # 0x00dc -> UNDEFINED + u'\ufffe' # 0x00dd -> UNDEFINED + u'\ufffe' # 0x00de -> UNDEFINED + u'\u0e3f' # 0x00df -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0x00e0 -> THAI CHARACTER SARA E + u'\u0e41' # 0x00e1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0x00e2 -> THAI CHARACTER SARA O + u'\u0e43' # 0x00e3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0x00e4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0x00e5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0x00e6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0x00e7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0x00e8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0x00e9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0x00ea -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0x00eb -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0x00ec -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0x00ed -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0x00ee -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0x00ef -> THAI CHARACTER FONGMAN + u'\u0e50' # 0x00f0 -> THAI DIGIT ZERO + u'\u0e51' # 0x00f1 -> THAI DIGIT ONE + u'\u0e52' # 0x00f2 -> THAI DIGIT TWO + u'\u0e53' # 0x00f3 -> THAI DIGIT THREE + u'\u0e54' # 0x00f4 -> THAI DIGIT FOUR + u'\u0e55' # 0x00f5 -> THAI DIGIT FIVE + u'\u0e56' # 0x00f6 -> THAI DIGIT SIX + u'\u0e57' # 0x00f7 -> THAI DIGIT SEVEN + u'\u0e58' # 0x00f8 -> THAI DIGIT EIGHT + u'\u0e59' # 0x00f9 -> THAI DIGIT NINE + u'\u0e5a' # 0x00fa -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0x00fb -> THAI CHARACTER KHOMUT + u'\ufffe' # 0x00fc -> UNDEFINED + u'\ufffe' # 0x00fd -> UNDEFINED + u'\ufffe' # 0x00fe -> UNDEFINED + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x0e01: 0x00a1, # THAI CHARACTER KO KAI + 0x0e02: 0x00a2, # THAI CHARACTER KHO KHAI + 0x0e03: 0x00a3, # THAI CHARACTER KHO KHUAT + 0x0e04: 0x00a4, # THAI CHARACTER KHO KHWAI + 0x0e05: 0x00a5, # THAI CHARACTER KHO KHON + 0x0e06: 0x00a6, # THAI CHARACTER KHO RAKHANG + 0x0e07: 0x00a7, # THAI CHARACTER NGO NGU + 0x0e08: 0x00a8, # THAI CHARACTER CHO CHAN + 0x0e09: 0x00a9, # THAI CHARACTER CHO CHING + 0x0e0a: 0x00aa, # THAI CHARACTER CHO CHANG + 0x0e0b: 0x00ab, # THAI CHARACTER SO SO + 0x0e0c: 0x00ac, # THAI CHARACTER CHO CHOE + 0x0e0d: 0x00ad, # THAI CHARACTER YO YING + 0x0e0e: 0x00ae, # THAI CHARACTER DO CHADA + 0x0e0f: 0x00af, # THAI CHARACTER TO PATAK + 0x0e10: 0x00b0, # THAI CHARACTER THO THAN + 0x0e11: 0x00b1, # THAI CHARACTER THO NANGMONTHO + 0x0e12: 0x00b2, # THAI CHARACTER THO PHUTHAO + 0x0e13: 0x00b3, # THAI CHARACTER NO NEN + 0x0e14: 0x00b4, # THAI CHARACTER DO DEK + 0x0e15: 0x00b5, # THAI CHARACTER TO TAO + 0x0e16: 0x00b6, # THAI CHARACTER THO THUNG + 0x0e17: 0x00b7, # THAI CHARACTER THO THAHAN + 0x0e18: 0x00b8, # THAI CHARACTER THO THONG + 0x0e19: 0x00b9, # THAI CHARACTER NO NU + 0x0e1a: 0x00ba, # THAI CHARACTER BO BAIMAI + 0x0e1b: 0x00bb, # THAI CHARACTER PO PLA + 0x0e1c: 0x00bc, # THAI CHARACTER PHO PHUNG + 0x0e1d: 0x00bd, # THAI CHARACTER FO FA + 0x0e1e: 0x00be, # THAI CHARACTER PHO PHAN + 0x0e1f: 0x00bf, # THAI CHARACTER FO FAN + 0x0e20: 0x00c0, # THAI CHARACTER PHO SAMPHAO + 0x0e21: 0x00c1, # THAI CHARACTER MO MA + 0x0e22: 0x00c2, # THAI CHARACTER YO YAK + 0x0e23: 0x00c3, # THAI CHARACTER RO RUA + 0x0e24: 0x00c4, # THAI CHARACTER RU + 0x0e25: 0x00c5, # THAI CHARACTER LO LING + 0x0e26: 0x00c6, # THAI CHARACTER LU + 0x0e27: 0x00c7, # THAI CHARACTER WO WAEN + 0x0e28: 0x00c8, # THAI CHARACTER SO SALA + 0x0e29: 0x00c9, # THAI CHARACTER SO RUSI + 0x0e2a: 0x00ca, # THAI CHARACTER SO SUA + 0x0e2b: 0x00cb, # THAI CHARACTER HO HIP + 0x0e2c: 0x00cc, # THAI CHARACTER LO CHULA + 0x0e2d: 0x00cd, # THAI CHARACTER O ANG + 0x0e2e: 0x00ce, # THAI CHARACTER HO NOKHUK + 0x0e2f: 0x00cf, # THAI CHARACTER PAIYANNOI + 0x0e30: 0x00d0, # THAI CHARACTER SARA A + 0x0e31: 0x00d1, # THAI CHARACTER MAI HAN-AKAT + 0x0e32: 0x00d2, # THAI CHARACTER SARA AA + 0x0e33: 0x00d3, # THAI CHARACTER SARA AM + 0x0e34: 0x00d4, # THAI CHARACTER SARA I + 0x0e35: 0x00d5, # THAI CHARACTER SARA II + 0x0e36: 0x00d6, # THAI CHARACTER SARA UE + 0x0e37: 0x00d7, # THAI CHARACTER SARA UEE + 0x0e38: 0x00d8, # THAI CHARACTER SARA U + 0x0e39: 0x00d9, # THAI CHARACTER SARA UU + 0x0e3a: 0x00da, # THAI CHARACTER PHINTHU + 0x0e3f: 0x00df, # THAI CURRENCY SYMBOL BAHT + 0x0e40: 0x00e0, # THAI CHARACTER SARA E + 0x0e41: 0x00e1, # THAI CHARACTER SARA AE + 0x0e42: 0x00e2, # THAI CHARACTER SARA O + 0x0e43: 0x00e3, # THAI CHARACTER SARA AI MAIMUAN + 0x0e44: 0x00e4, # THAI CHARACTER SARA AI MAIMALAI + 0x0e45: 0x00e5, # THAI CHARACTER LAKKHANGYAO + 0x0e46: 0x00e6, # THAI CHARACTER MAIYAMOK + 0x0e47: 0x00e7, # THAI CHARACTER MAITAIKHU + 0x0e48: 0x00e8, # THAI CHARACTER MAI EK + 0x0e49: 0x00e9, # THAI CHARACTER MAI THO + 0x0e4a: 0x00ea, # THAI CHARACTER MAI TRI + 0x0e4b: 0x00eb, # THAI CHARACTER MAI CHATTAWA + 0x0e4c: 0x00ec, # THAI CHARACTER THANTHAKHAT + 0x0e4d: 0x00ed, # THAI CHARACTER NIKHAHIT + 0x0e4e: 0x00ee, # THAI CHARACTER YAMAKKAN + 0x0e4f: 0x00ef, # THAI CHARACTER FONGMAN + 0x0e50: 0x00f0, # THAI DIGIT ZERO + 0x0e51: 0x00f1, # THAI DIGIT ONE + 0x0e52: 0x00f2, # THAI DIGIT TWO + 0x0e53: 0x00f3, # THAI DIGIT THREE + 0x0e54: 0x00f4, # THAI DIGIT FOUR + 0x0e55: 0x00f5, # THAI DIGIT FIVE + 0x0e56: 0x00f6, # THAI DIGIT SIX + 0x0e57: 0x00f7, # THAI DIGIT SEVEN + 0x0e58: 0x00f8, # THAI DIGIT EIGHT + 0x0e59: 0x00f9, # THAI DIGIT NINE + 0x0e5a: 0x00fa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0x00fb, # THAI CHARACTER KHOMUT + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x20ac: 0x0080, # EURO SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py index 9fa2fcc..bc4c0b8 100644 --- a/Lib/encodings/iso8859_1.py +++ b/Lib/encodings/iso8859_1.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-1.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -39,6 +34,524 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S (German) + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN (Icelandic) + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_10.py b/Lib/encodings/iso8859_10.py index df2565a..d88b498 100644 --- a/Lib/encodings/iso8859_10.py +++ b/Lib/encodings/iso8859_10.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-10.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,54 +32,572 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE - 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE - 0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00af: 0x014a, # LATIN CAPITAL LETTER ENG - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE - 0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE - 0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bd: 0x2015, # HORIZONTAL BAR - 0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00bf: 0x014b, # LATIN SMALL LETTER ENG - 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE - 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE - 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00ff: 0x0138, # LATIN SMALL LETTER KRA + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE + 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE + 0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00af: 0x014a, # LATIN CAPITAL LETTER ENG + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE + 0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE + 0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bd: 0x2015, # HORIZONTAL BAR + 0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00bf: 0x014b, # LATIN SMALL LETTER ENG + 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE + 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE + 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00ff: 0x0138, # LATIN SMALL LETTER KRA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0112' # 0x00a2 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0x00a3 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u012a' # 0x00a4 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0128' # 0x00a5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u0136' # 0x00a6 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u013b' # 0x00a8 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0110' # 0x00a9 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0160' # 0x00aa -> LATIN CAPITAL LETTER S WITH CARON + u'\u0166' # 0x00ab -> LATIN CAPITAL LETTER T WITH STROKE + u'\u017d' # 0x00ac -> LATIN CAPITAL LETTER Z WITH CARON + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u016a' # 0x00ae -> LATIN CAPITAL LETTER U WITH MACRON + u'\u014a' # 0x00af -> LATIN CAPITAL LETTER ENG + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0105' # 0x00b1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0113' # 0x00b2 -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0x00b3 -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012b' # 0x00b4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0129' # 0x00b5 -> LATIN SMALL LETTER I WITH TILDE + u'\u0137' # 0x00b6 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u013c' # 0x00b8 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0111' # 0x00b9 -> LATIN SMALL LETTER D WITH STROKE + u'\u0161' # 0x00ba -> LATIN SMALL LETTER S WITH CARON + u'\u0167' # 0x00bb -> LATIN SMALL LETTER T WITH STROKE + u'\u017e' # 0x00bc -> LATIN SMALL LETTER Z WITH CARON + u'\u2015' # 0x00bd -> HORIZONTAL BAR + u'\u016b' # 0x00be -> LATIN SMALL LETTER U WITH MACRON + u'\u014b' # 0x00bf -> LATIN SMALL LETTER ENG + u'\u0100' # 0x00c0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0x00c7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0x00cc -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\u0145' # 0x00d1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0x00d2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u0168' # 0x00d7 -> LATIN CAPITAL LETTER U WITH TILDE + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0x00d9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S (German) + u'\u0101' # 0x00e0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\u012f' # 0x00e7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0x00ec -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\u0146' # 0x00f1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0x00f2 -> LATIN SMALL LETTER O WITH MACRON + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0169' # 0x00f7 -> LATIN SMALL LETTER U WITH TILDE + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0x00f9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN (Icelandic) + u'\u0138' # 0x00ff -> LATIN SMALL LETTER KRA +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a7: 0x00a7, # SECTION SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) + 0x0100: 0x00c0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b1, # LATIN SMALL LETTER A WITH OGONEK + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0x00a9, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00b9, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0x00a2, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00b2, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cc, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00ec, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00a3, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00b3, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0x00a5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0x00b5, # LATIN SMALL LETTER I WITH TILDE + 0x012a: 0x00a4, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00b4, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00a6, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00b6, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0x00ff, # LATIN SMALL LETTER KRA + 0x013b: 0x00a8, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00b8, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0x00d1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014a: 0x00af, # LATIN CAPITAL LETTER ENG + 0x014b: 0x00bf, # LATIN SMALL LETTER ENG + 0x014c: 0x00d2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f2, # LATIN SMALL LETTER O WITH MACRON + 0x0160: 0x00aa, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00ba, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0x00ab, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0x00bb, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0x00d7, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0x00f7, # LATIN SMALL LETTER U WITH TILDE + 0x016a: 0x00ae, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00be, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f9, # LATIN SMALL LETTER U WITH OGONEK + 0x017d: 0x00ac, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00bc, # LATIN SMALL LETTER Z WITH CARON + 0x2015: 0x00bd, # HORIZONTAL BAR +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_11.py b/Lib/encodings/iso8859_11.py index dfe46b1..4640168 100644 --- a/Lib/encodings/iso8859_11.py +++ b/Lib/encodings/iso8859_11.py @@ -1,7 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-11.TXT' with gencodec.py. - - Generated from mapping found in - ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT +""" Python Character Mapping Codec generated from 'ISO8859/8859-11.TXT' with gencodec.py. """#" @@ -17,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -35,103 +32,613 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0e01, # THAI CHARACTER KO KAI - 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI - 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT - 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI - 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON - 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG - 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU - 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN - 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING - 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG - 0x00ab: 0x0e0b, # THAI CHARACTER SO SO - 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE - 0x00ad: 0x0e0d, # THAI CHARACTER YO YING - 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA - 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK - 0x00b0: 0x0e10, # THAI CHARACTER THO THAN - 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO - 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO - 0x00b3: 0x0e13, # THAI CHARACTER NO NEN - 0x00b4: 0x0e14, # THAI CHARACTER DO DEK - 0x00b5: 0x0e15, # THAI CHARACTER TO TAO - 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG - 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN - 0x00b8: 0x0e18, # THAI CHARACTER THO THONG - 0x00b9: 0x0e19, # THAI CHARACTER NO NU - 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI - 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA - 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG - 0x00bd: 0x0e1d, # THAI CHARACTER FO FA - 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN - 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN - 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO - 0x00c1: 0x0e21, # THAI CHARACTER MO MA - 0x00c2: 0x0e22, # THAI CHARACTER YO YAK - 0x00c3: 0x0e23, # THAI CHARACTER RO RUA - 0x00c4: 0x0e24, # THAI CHARACTER RU - 0x00c5: 0x0e25, # THAI CHARACTER LO LING - 0x00c6: 0x0e26, # THAI CHARACTER LU - 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN - 0x00c8: 0x0e28, # THAI CHARACTER SO SALA - 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI - 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA - 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP - 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA - 0x00cd: 0x0e2d, # THAI CHARACTER O ANG - 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK - 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI - 0x00d0: 0x0e30, # THAI CHARACTER SARA A - 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT - 0x00d2: 0x0e32, # THAI CHARACTER SARA AA - 0x00d3: 0x0e33, # THAI CHARACTER SARA AM - 0x00d4: 0x0e34, # THAI CHARACTER SARA I - 0x00d5: 0x0e35, # THAI CHARACTER SARA II - 0x00d6: 0x0e36, # THAI CHARACTER SARA UE - 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE - 0x00d8: 0x0e38, # THAI CHARACTER SARA U - 0x00d9: 0x0e39, # THAI CHARACTER SARA UU - 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT - 0x00e0: 0x0e40, # THAI CHARACTER SARA E - 0x00e1: 0x0e41, # THAI CHARACTER SARA AE - 0x00e2: 0x0e42, # THAI CHARACTER SARA O - 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN - 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI - 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO - 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK - 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU - 0x00e8: 0x0e48, # THAI CHARACTER MAI EK - 0x00e9: 0x0e49, # THAI CHARACTER MAI THO - 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI - 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA - 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT - 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT - 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN - 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN - 0x00f0: 0x0e50, # THAI DIGIT ZERO - 0x00f1: 0x0e51, # THAI DIGIT ONE - 0x00f2: 0x0e52, # THAI DIGIT TWO - 0x00f3: 0x0e53, # THAI DIGIT THREE - 0x00f4: 0x0e54, # THAI DIGIT FOUR - 0x00f5: 0x0e55, # THAI DIGIT FIVE - 0x00f6: 0x0e56, # THAI DIGIT SIX - 0x00f7: 0x0e57, # THAI DIGIT SEVEN - 0x00f8: 0x0e58, # THAI DIGIT EIGHT - 0x00f9: 0x0e59, # THAI DIGIT NINE - 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU - 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT - 0x00fc: None, - 0x00fd: None, - 0x00fe: None, - 0x00ff: None, + 0x00a1: 0x0e01, # THAI CHARACTER KO KAI + 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI + 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT + 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI + 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON + 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG + 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU + 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN + 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING + 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG + 0x00ab: 0x0e0b, # THAI CHARACTER SO SO + 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE + 0x00ad: 0x0e0d, # THAI CHARACTER YO YING + 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA + 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK + 0x00b0: 0x0e10, # THAI CHARACTER THO THAN + 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO + 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO + 0x00b3: 0x0e13, # THAI CHARACTER NO NEN + 0x00b4: 0x0e14, # THAI CHARACTER DO DEK + 0x00b5: 0x0e15, # THAI CHARACTER TO TAO + 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG + 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN + 0x00b8: 0x0e18, # THAI CHARACTER THO THONG + 0x00b9: 0x0e19, # THAI CHARACTER NO NU + 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI + 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA + 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG + 0x00bd: 0x0e1d, # THAI CHARACTER FO FA + 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN + 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN + 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO + 0x00c1: 0x0e21, # THAI CHARACTER MO MA + 0x00c2: 0x0e22, # THAI CHARACTER YO YAK + 0x00c3: 0x0e23, # THAI CHARACTER RO RUA + 0x00c4: 0x0e24, # THAI CHARACTER RU + 0x00c5: 0x0e25, # THAI CHARACTER LO LING + 0x00c6: 0x0e26, # THAI CHARACTER LU + 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN + 0x00c8: 0x0e28, # THAI CHARACTER SO SALA + 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI + 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA + 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP + 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA + 0x00cd: 0x0e2d, # THAI CHARACTER O ANG + 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK + 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI + 0x00d0: 0x0e30, # THAI CHARACTER SARA A + 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT + 0x00d2: 0x0e32, # THAI CHARACTER SARA AA + 0x00d3: 0x0e33, # THAI CHARACTER SARA AM + 0x00d4: 0x0e34, # THAI CHARACTER SARA I + 0x00d5: 0x0e35, # THAI CHARACTER SARA II + 0x00d6: 0x0e36, # THAI CHARACTER SARA UE + 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE + 0x00d8: 0x0e38, # THAI CHARACTER SARA U + 0x00d9: 0x0e39, # THAI CHARACTER SARA UU + 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT + 0x00e0: 0x0e40, # THAI CHARACTER SARA E + 0x00e1: 0x0e41, # THAI CHARACTER SARA AE + 0x00e2: 0x0e42, # THAI CHARACTER SARA O + 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN + 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI + 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO + 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK + 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU + 0x00e8: 0x0e48, # THAI CHARACTER MAI EK + 0x00e9: 0x0e49, # THAI CHARACTER MAI THO + 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI + 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA + 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT + 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT + 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN + 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN + 0x00f0: 0x0e50, # THAI DIGIT ZERO + 0x00f1: 0x0e51, # THAI DIGIT ONE + 0x00f2: 0x0e52, # THAI DIGIT TWO + 0x00f3: 0x0e53, # THAI DIGIT THREE + 0x00f4: 0x0e54, # THAI DIGIT FOUR + 0x00f5: 0x0e55, # THAI DIGIT FIVE + 0x00f6: 0x0e56, # THAI DIGIT SIX + 0x00f7: 0x0e57, # THAI DIGIT SEVEN + 0x00f8: 0x0e58, # THAI DIGIT EIGHT + 0x00f9: 0x0e59, # THAI DIGIT NINE + 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU + 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0e01' # 0x00a1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0x00a2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0x00a3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0x00a4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0x00a5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0x00a6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0x00a7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0x00a8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0x00a9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0x00aa -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0x00ab -> THAI CHARACTER SO SO + u'\u0e0c' # 0x00ac -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0x00ad -> THAI CHARACTER YO YING + u'\u0e0e' # 0x00ae -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0x00af -> THAI CHARACTER TO PATAK + u'\u0e10' # 0x00b0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0x00b1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0x00b2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0x00b3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0x00b4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0x00b5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0x00b6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0x00b7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0x00b8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0x00b9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0x00ba -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0x00bb -> THAI CHARACTER PO PLA + u'\u0e1c' # 0x00bc -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0x00bd -> THAI CHARACTER FO FA + u'\u0e1e' # 0x00be -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0x00bf -> THAI CHARACTER FO FAN + u'\u0e20' # 0x00c0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0x00c1 -> THAI CHARACTER MO MA + u'\u0e22' # 0x00c2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0x00c3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0x00c4 -> THAI CHARACTER RU + u'\u0e25' # 0x00c5 -> THAI CHARACTER LO LING + u'\u0e26' # 0x00c6 -> THAI CHARACTER LU + u'\u0e27' # 0x00c7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0x00c8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0x00c9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0x00ca -> THAI CHARACTER SO SUA + u'\u0e2b' # 0x00cb -> THAI CHARACTER HO HIP + u'\u0e2c' # 0x00cc -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0x00cd -> THAI CHARACTER O ANG + u'\u0e2e' # 0x00ce -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0x00cf -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0x00d0 -> THAI CHARACTER SARA A + u'\u0e31' # 0x00d1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0x00d2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0x00d3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0x00d4 -> THAI CHARACTER SARA I + u'\u0e35' # 0x00d5 -> THAI CHARACTER SARA II + u'\u0e36' # 0x00d6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0x00d7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0x00d8 -> THAI CHARACTER SARA U + u'\u0e39' # 0x00d9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0x00da -> THAI CHARACTER PHINTHU + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0e3f' # 0x00df -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0x00e0 -> THAI CHARACTER SARA E + u'\u0e41' # 0x00e1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0x00e2 -> THAI CHARACTER SARA O + u'\u0e43' # 0x00e3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0x00e4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0x00e5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0x00e6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0x00e7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0x00e8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0x00e9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0x00ea -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0x00eb -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0x00ec -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0x00ed -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0x00ee -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0x00ef -> THAI CHARACTER FONGMAN + u'\u0e50' # 0x00f0 -> THAI DIGIT ZERO + u'\u0e51' # 0x00f1 -> THAI DIGIT ONE + u'\u0e52' # 0x00f2 -> THAI DIGIT TWO + u'\u0e53' # 0x00f3 -> THAI DIGIT THREE + u'\u0e54' # 0x00f4 -> THAI DIGIT FOUR + u'\u0e55' # 0x00f5 -> THAI DIGIT FIVE + u'\u0e56' # 0x00f6 -> THAI DIGIT SIX + u'\u0e57' # 0x00f7 -> THAI DIGIT SEVEN + u'\u0e58' # 0x00f8 -> THAI DIGIT EIGHT + u'\u0e59' # 0x00f9 -> THAI DIGIT NINE + u'\u0e5a' # 0x00fa -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0x00fb -> THAI CHARACTER KHOMUT + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x0e01: 0x00a1, # THAI CHARACTER KO KAI + 0x0e02: 0x00a2, # THAI CHARACTER KHO KHAI + 0x0e03: 0x00a3, # THAI CHARACTER KHO KHUAT + 0x0e04: 0x00a4, # THAI CHARACTER KHO KHWAI + 0x0e05: 0x00a5, # THAI CHARACTER KHO KHON + 0x0e06: 0x00a6, # THAI CHARACTER KHO RAKHANG + 0x0e07: 0x00a7, # THAI CHARACTER NGO NGU + 0x0e08: 0x00a8, # THAI CHARACTER CHO CHAN + 0x0e09: 0x00a9, # THAI CHARACTER CHO CHING + 0x0e0a: 0x00aa, # THAI CHARACTER CHO CHANG + 0x0e0b: 0x00ab, # THAI CHARACTER SO SO + 0x0e0c: 0x00ac, # THAI CHARACTER CHO CHOE + 0x0e0d: 0x00ad, # THAI CHARACTER YO YING + 0x0e0e: 0x00ae, # THAI CHARACTER DO CHADA + 0x0e0f: 0x00af, # THAI CHARACTER TO PATAK + 0x0e10: 0x00b0, # THAI CHARACTER THO THAN + 0x0e11: 0x00b1, # THAI CHARACTER THO NANGMONTHO + 0x0e12: 0x00b2, # THAI CHARACTER THO PHUTHAO + 0x0e13: 0x00b3, # THAI CHARACTER NO NEN + 0x0e14: 0x00b4, # THAI CHARACTER DO DEK + 0x0e15: 0x00b5, # THAI CHARACTER TO TAO + 0x0e16: 0x00b6, # THAI CHARACTER THO THUNG + 0x0e17: 0x00b7, # THAI CHARACTER THO THAHAN + 0x0e18: 0x00b8, # THAI CHARACTER THO THONG + 0x0e19: 0x00b9, # THAI CHARACTER NO NU + 0x0e1a: 0x00ba, # THAI CHARACTER BO BAIMAI + 0x0e1b: 0x00bb, # THAI CHARACTER PO PLA + 0x0e1c: 0x00bc, # THAI CHARACTER PHO PHUNG + 0x0e1d: 0x00bd, # THAI CHARACTER FO FA + 0x0e1e: 0x00be, # THAI CHARACTER PHO PHAN + 0x0e1f: 0x00bf, # THAI CHARACTER FO FAN + 0x0e20: 0x00c0, # THAI CHARACTER PHO SAMPHAO + 0x0e21: 0x00c1, # THAI CHARACTER MO MA + 0x0e22: 0x00c2, # THAI CHARACTER YO YAK + 0x0e23: 0x00c3, # THAI CHARACTER RO RUA + 0x0e24: 0x00c4, # THAI CHARACTER RU + 0x0e25: 0x00c5, # THAI CHARACTER LO LING + 0x0e26: 0x00c6, # THAI CHARACTER LU + 0x0e27: 0x00c7, # THAI CHARACTER WO WAEN + 0x0e28: 0x00c8, # THAI CHARACTER SO SALA + 0x0e29: 0x00c9, # THAI CHARACTER SO RUSI + 0x0e2a: 0x00ca, # THAI CHARACTER SO SUA + 0x0e2b: 0x00cb, # THAI CHARACTER HO HIP + 0x0e2c: 0x00cc, # THAI CHARACTER LO CHULA + 0x0e2d: 0x00cd, # THAI CHARACTER O ANG + 0x0e2e: 0x00ce, # THAI CHARACTER HO NOKHUK + 0x0e2f: 0x00cf, # THAI CHARACTER PAIYANNOI + 0x0e30: 0x00d0, # THAI CHARACTER SARA A + 0x0e31: 0x00d1, # THAI CHARACTER MAI HAN-AKAT + 0x0e32: 0x00d2, # THAI CHARACTER SARA AA + 0x0e33: 0x00d3, # THAI CHARACTER SARA AM + 0x0e34: 0x00d4, # THAI CHARACTER SARA I + 0x0e35: 0x00d5, # THAI CHARACTER SARA II + 0x0e36: 0x00d6, # THAI CHARACTER SARA UE + 0x0e37: 0x00d7, # THAI CHARACTER SARA UEE + 0x0e38: 0x00d8, # THAI CHARACTER SARA U + 0x0e39: 0x00d9, # THAI CHARACTER SARA UU + 0x0e3a: 0x00da, # THAI CHARACTER PHINTHU + 0x0e3f: 0x00df, # THAI CURRENCY SYMBOL BAHT + 0x0e40: 0x00e0, # THAI CHARACTER SARA E + 0x0e41: 0x00e1, # THAI CHARACTER SARA AE + 0x0e42: 0x00e2, # THAI CHARACTER SARA O + 0x0e43: 0x00e3, # THAI CHARACTER SARA AI MAIMUAN + 0x0e44: 0x00e4, # THAI CHARACTER SARA AI MAIMALAI + 0x0e45: 0x00e5, # THAI CHARACTER LAKKHANGYAO + 0x0e46: 0x00e6, # THAI CHARACTER MAIYAMOK + 0x0e47: 0x00e7, # THAI CHARACTER MAITAIKHU + 0x0e48: 0x00e8, # THAI CHARACTER MAI EK + 0x0e49: 0x00e9, # THAI CHARACTER MAI THO + 0x0e4a: 0x00ea, # THAI CHARACTER MAI TRI + 0x0e4b: 0x00eb, # THAI CHARACTER MAI CHATTAWA + 0x0e4c: 0x00ec, # THAI CHARACTER THANTHAKHAT + 0x0e4d: 0x00ed, # THAI CHARACTER NIKHAHIT + 0x0e4e: 0x00ee, # THAI CHARACTER YAMAKKAN + 0x0e4f: 0x00ef, # THAI CHARACTER FONGMAN + 0x0e50: 0x00f0, # THAI DIGIT ZERO + 0x0e51: 0x00f1, # THAI DIGIT ONE + 0x0e52: 0x00f2, # THAI DIGIT TWO + 0x0e53: 0x00f3, # THAI DIGIT THREE + 0x0e54: 0x00f4, # THAI DIGIT FOUR + 0x0e55: 0x00f5, # THAI DIGIT FIVE + 0x0e56: 0x00f6, # THAI DIGIT SIX + 0x0e57: 0x00f7, # THAI DIGIT SEVEN + 0x0e58: 0x00f8, # THAI DIGIT EIGHT + 0x0e59: 0x00f9, # THAI DIGIT NINE + 0x0e5a: 0x00fa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0x00fb, # THAI CHARACTER KHOMUT +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_13.py b/Lib/encodings/iso8859_13.py index 57b6c70..14129bc 100644 --- a/Lib/encodings/iso8859_13.py +++ b/Lib/encodings/iso8859_13.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-13.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,64 +32,582 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00bf: 0x00e6, # LATIN SMALL LETTER AE - 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00bf: 0x00e6, # LATIN SMALL LETTER AE + 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u201d' # 0x00a1 -> RIGHT DOUBLE QUOTATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u201e' # 0x00a5 -> DOUBLE LOW-9 QUOTATION MARK + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xd8' # 0x00a8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0156' # 0x00aa -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xc6' # 0x00af -> LATIN CAPITAL LETTER AE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u201c' # 0x00b4 -> LEFT DOUBLE QUOTATION MARK + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xf8' # 0x00b8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\u0157' # 0x00ba -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0x00bf -> LATIN SMALL LETTER AE + u'\u0104' # 0x00c0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0x00c1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0x00c2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0x00c3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0x00c6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0x00c7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0x00ca -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0x00cb -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0x00cc -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0x00cd -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0x00ce -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0x00cf -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0x00d0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0x00d4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0172' # 0x00d8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0x00d9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0x00da -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0x00db -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0x00dd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0x00de -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S (German) + u'\u0105' # 0x00e0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0x00e1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0x00e2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0x00e3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0x00e6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0x00e7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0x00ea -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0x00eb -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0x00ec -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0x00ed -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0x00ee -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0x00ef -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0x00f0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0x00f2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0x00f4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0173' # 0x00f8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0x00f9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0x00fa -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0x00fb -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0x00fd -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0x00fe -> LATIN SMALL LETTER Z WITH CARON + u'\u2019' # 0x00ff -> RIGHT SINGLE QUOTATION MARK +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00af, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00a8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00bf, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00b8, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00c2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00c0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00e0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e3, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00c7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00e7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cb, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00eb, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00c6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00e6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00cc, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00ec, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00ce, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00ee, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00cd, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00ed, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00cf, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00ef, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00d9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00f9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00d2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00d4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x00aa, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x00ba, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x00da, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00fa, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00d0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00f0, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00db, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00fb, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x00ca, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ea, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00dd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00fd, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00de, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00fe, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0x00ff, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00b4, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00a1, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00a5, # DOUBLE LOW-9 QUOTATION MARK +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_14.py b/Lib/encodings/iso8859_14.py index 9f3d6fe..2bc8137 100644 --- a/Lib/encodings/iso8859_14.py +++ b/Lib/encodings/iso8859_14.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-14.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,39 +32,557 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE - 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE - 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE - 0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE - 0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE - 0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE - 0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE - 0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE - 0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE - 0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE - 0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE - 0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE - 0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE - 0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE - 0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE - 0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE - 0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE - 0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS - 0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS - 0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE - 0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX - 0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE - 0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX - 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE - 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE + 0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE + 0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE + 0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE + 0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE + 0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS + 0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u1e02' # 0x00a1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE + u'\u1e03' # 0x00a2 -> LATIN SMALL LETTER B WITH DOT ABOVE + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u010a' # 0x00a4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u010b' # 0x00a5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u1e0a' # 0x00a6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u1e80' # 0x00a8 -> LATIN CAPITAL LETTER W WITH GRAVE + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u1e82' # 0x00aa -> LATIN CAPITAL LETTER W WITH ACUTE + u'\u1e0b' # 0x00ab -> LATIN SMALL LETTER D WITH DOT ABOVE + u'\u1ef2' # 0x00ac -> LATIN CAPITAL LETTER Y WITH GRAVE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u0178' # 0x00af -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u1e1e' # 0x00b0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE + u'\u1e1f' # 0x00b1 -> LATIN SMALL LETTER F WITH DOT ABOVE + u'\u0120' # 0x00b2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\u0121' # 0x00b3 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\u1e40' # 0x00b4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE + u'\u1e41' # 0x00b5 -> LATIN SMALL LETTER M WITH DOT ABOVE + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\u1e56' # 0x00b7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE + u'\u1e81' # 0x00b8 -> LATIN SMALL LETTER W WITH GRAVE + u'\u1e57' # 0x00b9 -> LATIN SMALL LETTER P WITH DOT ABOVE + u'\u1e83' # 0x00ba -> LATIN SMALL LETTER W WITH ACUTE + u'\u1e60' # 0x00bb -> LATIN CAPITAL LETTER S WITH DOT ABOVE + u'\u1ef3' # 0x00bc -> LATIN SMALL LETTER Y WITH GRAVE + u'\u1e84' # 0x00bd -> LATIN CAPITAL LETTER W WITH DIAERESIS + u'\u1e85' # 0x00be -> LATIN SMALL LETTER W WITH DIAERESIS + u'\u1e61' # 0x00bf -> LATIN SMALL LETTER S WITH DOT ABOVE + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0174' # 0x00d0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u1e6a' # 0x00d7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0176' # 0x00de -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0175' # 0x00f0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u1e6b' # 0x00f7 -> LATIN SMALL LETTER T WITH DOT ABOVE + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0177' # 0x00fe -> LATIN SMALL LETTER Y WITH CIRCUMFLEX + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x010a: 0x00a4, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010b: 0x00a5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x0120: 0x00b2, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0x00b3, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0174: 0x00d0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x0175: 0x00f0, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x0176: 0x00de, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x0177: 0x00fe, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x0178: 0x00af, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x1e02: 0x00a1, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x1e03: 0x00a2, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x1e0a: 0x00a6, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x1e0b: 0x00ab, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x1e1e: 0x00b0, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x1e1f: 0x00b1, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x1e40: 0x00b4, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x1e41: 0x00b5, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x1e56: 0x00b7, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x1e57: 0x00b9, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x1e60: 0x00bb, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x1e61: 0x00bf, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x1e6a: 0x00d7, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x1e6b: 0x00f7, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x1e80: 0x00a8, # LATIN CAPITAL LETTER W WITH GRAVE + 0x1e81: 0x00b8, # LATIN SMALL LETTER W WITH GRAVE + 0x1e82: 0x00aa, # LATIN CAPITAL LETTER W WITH ACUTE + 0x1e83: 0x00ba, # LATIN SMALL LETTER W WITH ACUTE + 0x1e84: 0x00bd, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x1e85: 0x00be, # LATIN SMALL LETTER W WITH DIAERESIS + 0x1ef2: 0x00ac, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x1ef3: 0x00bc, # LATIN SMALL LETTER Y WITH GRAVE +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_15.py b/Lib/encodings/iso8859_15.py index 5708df0..b032b4e 100644 --- a/Lib/encodings/iso8859_15.py +++ b/Lib/encodings/iso8859_15.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-15.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,16 +32,534 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a4: 0x20ac, # EURO SIGN - 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE - 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00a4: 0x20ac, # EURO SIGN + 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE + 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u20ac' # 0x00a4 -> EURO SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\u0160' # 0x00a6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u0161' # 0x00a8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u017d' # 0x00b4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u017e' # 0x00b8 -> LATIN SMALL LETTER Z WITH CARON + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0x00bc -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00bd -> LATIN SMALL LIGATURE OE + u'\u0178' # 0x00be -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0x00bc, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00bd, # LATIN SMALL LIGATURE OE + 0x0160: 0x00a6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00a8, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x00be, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017d: 0x00b4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00b8, # LATIN SMALL LETTER Z WITH CARON + 0x20ac: 0x00a4, # EURO SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_16.py b/Lib/encodings/iso8859_16.py index 80c0740..1763279 100644 --- a/Lib/encodings/iso8859_16.py +++ b/Lib/encodings/iso8859_16.py @@ -1,7 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-16.TXT' with gencodec.py. - - Generated from mapping found in - ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT +""" Python Character Mapping Codec generated from 'ISO8859/8859-16.TXT' with gencodec.py. """#" @@ -17,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -35,48 +32,566 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a4: 0x20ac, # EURO SIGN - 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00aa: 0x0218, # LATIN CAPITAL LETTER S WITH COMMA BELOW - 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00ae: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b2: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b5: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00b9: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ba: 0x0219, # LATIN SMALL LETTER S WITH COMMA BELOW - 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE - 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d7: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00d8: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00dd: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00de: 0x021a, # LATIN CAPITAL LETTER T WITH COMMA BELOW - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f7: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00f8: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fd: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00fe: 0x021b, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a4: 0x20ac, # EURO SIGN + 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00aa: 0x0218, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00ae: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b2: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b5: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00b9: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ba: 0x0219, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE + 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d7: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00d8: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00dd: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00de: 0x021a, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f7: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00f8: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fd: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00fe: 0x021b, # LATIN SMALL LETTER T WITH COMMA BELOW }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0x00a2 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0141' # 0x00a3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u20ac' # 0x00a4 -> EURO SIGN + u'\u201e' # 0x00a5 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0160' # 0x00a6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u0161' # 0x00a8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0218' # 0x00aa -> LATIN CAPITAL LETTER S WITH COMMA BELOW + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0179' # 0x00ac -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u017a' # 0x00ae -> LATIN SMALL LETTER Z WITH ACUTE + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u010c' # 0x00b2 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0142' # 0x00b3 -> LATIN SMALL LETTER L WITH STROKE + u'\u017d' # 0x00b4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u201d' # 0x00b5 -> RIGHT DOUBLE QUOTATION MARK + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u017e' # 0x00b8 -> LATIN SMALL LETTER Z WITH CARON + u'\u010d' # 0x00b9 -> LATIN SMALL LETTER C WITH CARON + u'\u0219' # 0x00ba -> LATIN SMALL LETTER S WITH COMMA BELOW + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0x00bc -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00bd -> LATIN SMALL LIGATURE OE + u'\u0178' # 0x00be -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0x00c5 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0x00d5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015a' # 0x00d7 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0170' # 0x00d8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0118' # 0x00dd -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u021a' # 0x00de -> LATIN CAPITAL LETTER T WITH COMMA BELOW + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0107' # 0x00e5 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0x00f5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u015b' # 0x00f7 -> LATIN SMALL LETTER S WITH ACUTE + u'\u0171' # 0x00f8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0119' # 0x00fd -> LATIN SMALL LETTER E WITH OGONEK + u'\u021b' # 0x00fe -> LATIN SMALL LETTER T WITH COMMA BELOW + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00a2, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c5, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e5, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00b2, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00b9, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00dd, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00fd, # LATIN SMALL LETTER E WITH OGONEK + 0x0141: 0x00a3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00b3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0150: 0x00d5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x00f5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0152: 0x00bc, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00bd, # LATIN SMALL LIGATURE OE + 0x015a: 0x00d7, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00f7, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00a6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00a8, # LATIN SMALL LETTER S WITH CARON + 0x0170: 0x00d8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00f8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0178: 0x00be, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0179: 0x00ac, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ae, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00b4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00b8, # LATIN SMALL LETTER Z WITH CARON + 0x0218: 0x00aa, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x0219: 0x00ba, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x021a: 0x00de, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x021b: 0x00fe, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x201d: 0x00b5, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00a5, # DOUBLE LOW-9 QUOTATION MARK + 0x20ac: 0x00a4, # EURO SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_2.py b/Lib/encodings/iso8859_2.py index c9ac2c2..4093f02 100644 --- a/Lib/encodings/iso8859_2.py +++ b/Lib/encodings/iso8859_2.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-2.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,65 +32,583 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x02d8, # BREVE - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00b7: 0x02c7, # CARON - 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x02d8, # BREVE + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00b7: 0x02c7, # CARON + 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u02d8' # 0x00a2 -> BREVE + u'\u0141' # 0x00a3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u013d' # 0x00a5 -> LATIN CAPITAL LETTER L WITH CARON + u'\u015a' # 0x00a6 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\u0160' # 0x00a9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u015e' # 0x00aa -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u0164' # 0x00ab -> LATIN CAPITAL LETTER T WITH CARON + u'\u0179' # 0x00ac -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u017d' # 0x00ae -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0105' # 0x00b1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0x00b2 -> OGONEK + u'\u0142' # 0x00b3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\u013e' # 0x00b5 -> LATIN SMALL LETTER L WITH CARON + u'\u015b' # 0x00b6 -> LATIN SMALL LETTER S WITH ACUTE + u'\u02c7' # 0x00b7 -> CARON + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0161' # 0x00b9 -> LATIN SMALL LETTER S WITH CARON + u'\u015f' # 0x00ba -> LATIN SMALL LETTER S WITH CEDILLA + u'\u0165' # 0x00bb -> LATIN SMALL LETTER T WITH CARON + u'\u017a' # 0x00bc -> LATIN SMALL LETTER Z WITH ACUTE + u'\u02dd' # 0x00bd -> DOUBLE ACUTE ACCENT + u'\u017e' # 0x00be -> LATIN SMALL LETTER Z WITH CARON + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0x00c0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0x00c5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0x00c6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0x00cc -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0x00cf -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0x00d5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0158' # 0x00d8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0x00d9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0x00db -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0x00de -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0x00e0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0x00e5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0x00e6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0x00ec -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0x00ef -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00f2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0x00f5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0159' # 0x00f8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0x00f9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0x00fe -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b8: 0x00b8, # CEDILLA + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b1, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e6, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00cf, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00ef, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00cc, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00ec, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x00c5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x00e5, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x00a5, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x00b5, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x00a3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00b3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00f2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x00d5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x00f5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00c0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00e0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00d8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00f8, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x00a6, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00b6, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00aa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ba, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x00a9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00b9, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00de, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00fe, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x00ab, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x00bb, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00d9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x00f9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00db, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x00ac, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00bc, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00ae, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00be, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00b7, # CARON + 0x02d8: 0x00a2, # BREVE + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x00b2, # OGONEK + 0x02dd: 0x00bd, # DOUBLE ACUTE ACCENT +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_3.py b/Lib/encodings/iso8859_3.py index 776423e..4cf4f41 100644 --- a/Lib/encodings/iso8859_3.py +++ b/Lib/encodings/iso8859_3.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-3.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,43 +32,554 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE - 0x00a2: 0x02d8, # BREVE - 0x00a5: None, - 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX - 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX - 0x00ae: None, - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE - 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX - 0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX - 0x00be: None, - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c3: None, - 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX - 0x00d0: None, - 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX - 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE - 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX - 0x00e3: None, - 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX - 0x00f0: None, - 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX - 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE - 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE + 0x00a2: 0x02d8, # BREVE + 0x00a5: None, + 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x00ae: None, + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE + 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x00be: None, + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: None, + 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x00d0: None, + 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE + 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x00e3: None, + 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x00f0: None, + 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE + 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0126' # 0x00a1 -> LATIN CAPITAL LETTER H WITH STROKE + u'\u02d8' # 0x00a2 -> BREVE + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufffe' + u'\u0124' # 0x00a6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\u0130' # 0x00a9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0x00aa -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u011e' # 0x00ab -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0134' # 0x00ac -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\ufffe' + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0127' # 0x00b1 -> LATIN SMALL LETTER H WITH STROKE + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u0125' # 0x00b6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0131' # 0x00b9 -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0x00ba -> LATIN SMALL LETTER S WITH CEDILLA + u'\u011f' # 0x00bb -> LATIN SMALL LETTER G WITH BREVE + u'\u0135' # 0x00bc -> LATIN SMALL LETTER J WITH CIRCUMFLEX + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\ufffe' + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\ufffe' + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u010a' # 0x00c5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u0108' # 0x00c6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\ufffe' + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0120' # 0x00d5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u011c' # 0x00d8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u016c' # 0x00dd -> LATIN CAPITAL LETTER U WITH BREVE + u'\u015c' # 0x00de -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\ufffe' + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010b' # 0x00e5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u0109' # 0x00e6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\ufffe' + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0121' # 0x00f5 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u011d' # 0x00f8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u016d' # 0x00fd -> LATIN SMALL LETTER U WITH BREVE + u'\u015d' # 0x00fe -> LATIN SMALL LETTER S WITH CIRCUMFLEX + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0108: 0x00c6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x0109: 0x00e6, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x010a: 0x00c5, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010b: 0x00e5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x011c: 0x00d8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x011d: 0x00f8, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x011e: 0x00ab, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00bb, # LATIN SMALL LETTER G WITH BREVE + 0x0120: 0x00d5, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0x00f5, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0124: 0x00a6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x0125: 0x00b6, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x0126: 0x00a1, # LATIN CAPITAL LETTER H WITH STROKE + 0x0127: 0x00b1, # LATIN SMALL LETTER H WITH STROKE + 0x0130: 0x00a9, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00b9, # LATIN SMALL LETTER DOTLESS I + 0x0134: 0x00ac, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x0135: 0x00bc, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x015c: 0x00de, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x015d: 0x00fe, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x015e: 0x00aa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ba, # LATIN SMALL LETTER S WITH CEDILLA + 0x016c: 0x00dd, # LATIN CAPITAL LETTER U WITH BREVE + 0x016d: 0x00fd, # LATIN SMALL LETTER U WITH BREVE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x02d8: 0x00a2, # BREVE + 0x02d9: 0x00ff, # DOT ABOVE +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_4.py b/Lib/encodings/iso8859_4.py index 3f84965..19ad6df 100644 --- a/Lib/encodings/iso8859_4.py +++ b/Lib/encodings/iso8859_4.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-4.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,58 +32,576 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0138, # LATIN SMALL LETTER KRA - 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE - 0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE - 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE - 0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00b7: 0x02c7, # CARON - 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE - 0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG - 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bf: 0x014b, # LATIN SMALL LETTER ENG - 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE - 0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE - 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0138, # LATIN SMALL LETTER KRA + 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE + 0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE + 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE + 0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00b7: 0x02c7, # CARON + 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE + 0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG + 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bf: 0x014b, # LATIN SMALL LETTER ENG + 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE + 0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE + 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0138' # 0x00a2 -> LATIN SMALL LETTER KRA + u'\u0156' # 0x00a3 -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u0128' # 0x00a5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u013b' # 0x00a6 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\u0160' # 0x00a9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0112' # 0x00aa -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0x00ab -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0166' # 0x00ac -> LATIN CAPITAL LETTER T WITH STROKE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u017d' # 0x00ae -> LATIN CAPITAL LETTER Z WITH CARON + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0105' # 0x00b1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0x00b2 -> OGONEK + u'\u0157' # 0x00b3 -> LATIN SMALL LETTER R WITH CEDILLA + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\u0129' # 0x00b5 -> LATIN SMALL LETTER I WITH TILDE + u'\u013c' # 0x00b6 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u02c7' # 0x00b7 -> CARON + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0161' # 0x00b9 -> LATIN SMALL LETTER S WITH CARON + u'\u0113' # 0x00ba -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0x00bb -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0167' # 0x00bc -> LATIN SMALL LETTER T WITH STROKE + u'\u014a' # 0x00bd -> LATIN CAPITAL LETTER ENG + u'\u017e' # 0x00be -> LATIN SMALL LETTER Z WITH CARON + u'\u014b' # 0x00bf -> LATIN SMALL LETTER ENG + u'\u0100' # 0x00c0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0x00c7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0x00cc -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u012a' # 0x00cf -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0145' # 0x00d1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0x00d2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0136' # 0x00d3 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0x00d9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0168' # 0x00dd -> LATIN CAPITAL LETTER U WITH TILDE + u'\u016a' # 0x00de -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0101' # 0x00e0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\u012f' # 0x00e7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0x00ec -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u012b' # 0x00ef -> LATIN SMALL LETTER I WITH MACRON + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0146' # 0x00f1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0x00f2 -> LATIN SMALL LETTER O WITH MACRON + u'\u0137' # 0x00f3 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0x00f9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0169' # 0x00fd -> LATIN SMALL LETTER U WITH TILDE + u'\u016b' # 0x00fe -> LATIN SMALL LETTER U WITH MACRON + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b8: 0x00b8, # CEDILLA + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00c0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b1, # LATIN SMALL LETTER A WITH OGONEK + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0x00aa, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00ba, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cc, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00ec, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00ab, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00bb, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0x00a5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0x00b5, # LATIN SMALL LETTER I WITH TILDE + 0x012a: 0x00cf, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00ef, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00d3, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00f3, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0x00a2, # LATIN SMALL LETTER KRA + 0x013b: 0x00a6, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00b6, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0x00d1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014a: 0x00bd, # LATIN CAPITAL LETTER ENG + 0x014b: 0x00bf, # LATIN SMALL LETTER ENG + 0x014c: 0x00d2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f2, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x00a3, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x00b3, # LATIN SMALL LETTER R WITH CEDILLA + 0x0160: 0x00a9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00b9, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0x00ac, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0x00bc, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0x00dd, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0x00fd, # LATIN SMALL LETTER U WITH TILDE + 0x016a: 0x00de, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00fe, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f9, # LATIN SMALL LETTER U WITH OGONEK + 0x017d: 0x00ae, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00be, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00b7, # CARON + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x00b2, # OGONEK +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_5.py b/Lib/encodings/iso8859_5.py index ac9a842..eca4e72 100644 --- a/Lib/encodings/iso8859_5.py +++ b/Lib/encodings/iso8859_5.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-5.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,102 +32,620 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00d0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00d8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00da: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00db: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00de: 0x043e, # CYRILLIC SMALL LETTER O - 0x00df: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x2116, # NUMERO SIGN - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00d0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00d8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00da: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00db: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00de: 0x043e, # CYRILLIC SMALL LETTER O + 0x00df: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x2116, # NUMERO SIGN + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0401' # 0x00a1 -> CYRILLIC CAPITAL LETTER IO + u'\u0402' # 0x00a2 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0x00a3 -> CYRILLIC CAPITAL LETTER GJE + u'\u0404' # 0x00a4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0405' # 0x00a5 -> CYRILLIC CAPITAL LETTER DZE + u'\u0406' # 0x00a6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0x00a7 -> CYRILLIC CAPITAL LETTER YI + u'\u0408' # 0x00a8 -> CYRILLIC CAPITAL LETTER JE + u'\u0409' # 0x00a9 -> CYRILLIC CAPITAL LETTER LJE + u'\u040a' # 0x00aa -> CYRILLIC CAPITAL LETTER NJE + u'\u040b' # 0x00ab -> CYRILLIC CAPITAL LETTER TSHE + u'\u040c' # 0x00ac -> CYRILLIC CAPITAL LETTER KJE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u040e' # 0x00ae -> CYRILLIC CAPITAL LETTER SHORT U + u'\u040f' # 0x00af -> CYRILLIC CAPITAL LETTER DZHE + u'\u0410' # 0x00b0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x00b1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x00b2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x00b3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x00b4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x00b5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x00b6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x00b7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x00b9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x00ba -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x00bb -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x00bc -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x00bd -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x00be -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x00bf -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x00c0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x00c1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x00c2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x00c3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x00c4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x00c5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x00c6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x00c7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x00c8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x00c9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x00ca -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x00cb -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x00cc -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x00cd -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x00ce -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x00cf -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00d0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00d1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00d2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00d3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00d4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00d5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00d6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00d7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00d8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00d9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00da -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00db -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00dc -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00dd -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00de -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00df -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA + u'\u2116' # 0x00f0 -> NUMERO SIGN + u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO + u'\u0452' # 0x00f2 -> CYRILLIC SMALL LETTER DJE + u'\u0453' # 0x00f3 -> CYRILLIC SMALL LETTER GJE + u'\u0454' # 0x00f4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0455' # 0x00f5 -> CYRILLIC SMALL LETTER DZE + u'\u0456' # 0x00f6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0x00f7 -> CYRILLIC SMALL LETTER YI + u'\u0458' # 0x00f8 -> CYRILLIC SMALL LETTER JE + u'\u0459' # 0x00f9 -> CYRILLIC SMALL LETTER LJE + u'\u045a' # 0x00fa -> CYRILLIC SMALL LETTER NJE + u'\u045b' # 0x00fb -> CYRILLIC SMALL LETTER TSHE + u'\u045c' # 0x00fc -> CYRILLIC SMALL LETTER KJE + u'\xa7' # 0x00fd -> SECTION SIGN + u'\u045e' # 0x00fe -> CYRILLIC SMALL LETTER SHORT U + u'\u045f' # 0x00ff -> CYRILLIC SMALL LETTER DZHE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a7: 0x00fd, # SECTION SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x0401: 0x00a1, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x00a2, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x00a3, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x00a4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x00a5, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x00a6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00a7, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x00a8, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x00a9, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x00aa, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x00ab, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x00ac, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x00ae, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x00af, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00b0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00b1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00b2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00b3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00b4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00b5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00b6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00b7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00b9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00ba, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00bb, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00bc, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00bd, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00be, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00bf, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00c0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00c1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00c2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00c3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00c4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00c5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00c6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00c7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00c8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00c9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x00ca, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00cb, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00cc, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00cd, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x00ce, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00cf, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00d0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00d1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00d2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00d3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00d4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00d5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00d6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00d7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00d8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00d9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00da, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00db, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00dc, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00dd, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00de, # CYRILLIC SMALL LETTER O + 0x043f: 0x00df, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E + 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO + 0x0452: 0x00f2, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x00f3, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x00f4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x00f5, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x00f6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00f7, # CYRILLIC SMALL LETTER YI + 0x0458: 0x00f8, # CYRILLIC SMALL LETTER JE + 0x0459: 0x00f9, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x00fa, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x00fb, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x00fc, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x00fe, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x00ff, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0x00f0, # NUMERO SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_6.py b/Lib/encodings/iso8859_6.py index 3f5ab56..254dea3 100644 --- a/Lib/encodings/iso8859_6.py +++ b/Lib/encodings/iso8859_6.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-6.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,101 +32,574 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: None, - 0x00a2: None, - 0x00a3: None, - 0x00a5: None, - 0x00a6: None, - 0x00a7: None, - 0x00a8: None, - 0x00a9: None, - 0x00aa: None, - 0x00ab: None, - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ae: None, - 0x00af: None, - 0x00b0: None, - 0x00b1: None, - 0x00b2: None, - 0x00b3: None, - 0x00b4: None, - 0x00b5: None, - 0x00b6: None, - 0x00b7: None, - 0x00b8: None, - 0x00b9: None, - 0x00ba: None, - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: None, - 0x00bd: None, - 0x00be: None, - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: None, - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: None, - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: None, - 0x00f4: None, - 0x00f5: None, - 0x00f6: None, - 0x00f7: None, - 0x00f8: None, - 0x00f9: None, - 0x00fa: None, - 0x00fb: None, - 0x00fc: None, - 0x00fd: None, - 0x00fe: None, - 0x00ff: None, + 0x00a1: None, + 0x00a2: None, + 0x00a3: None, + 0x00a5: None, + 0x00a6: None, + 0x00a7: None, + 0x00a8: None, + 0x00a9: None, + 0x00aa: None, + 0x00ab: None, + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ae: None, + 0x00af: None, + 0x00b0: None, + 0x00b1: None, + 0x00b2: None, + 0x00b3: None, + 0x00b4: None, + 0x00b5: None, + 0x00b6: None, + 0x00b7: None, + 0x00b8: None, + 0x00b9: None, + 0x00ba: None, + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: None, + 0x00bd: None, + 0x00be: None, + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: None, + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d7: 0x0637, # ARABIC LETTER TAH + 0x00d8: 0x0638, # ARABIC LETTER ZAH + 0x00d9: 0x0639, # ARABIC LETTER AIN + 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: None, + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0x0641, # ARABIC LETTER FEH + 0x00e2: 0x0642, # ARABIC LETTER QAF + 0x00e3: 0x0643, # ARABIC LETTER KAF + 0x00e4: 0x0644, # ARABIC LETTER LAM + 0x00e5: 0x0645, # ARABIC LETTER MEEM + 0x00e6: 0x0646, # ARABIC LETTER NOON + 0x00e7: 0x0647, # ARABIC LETTER HEH + 0x00e8: 0x0648, # ARABIC LETTER WAW + 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ea: 0x064a, # ARABIC LETTER YEH + 0x00eb: 0x064b, # ARABIC FATHATAN + 0x00ec: 0x064c, # ARABIC DAMMATAN + 0x00ed: 0x064d, # ARABIC KASRATAN + 0x00ee: 0x064e, # ARABIC FATHA + 0x00ef: 0x064f, # ARABIC DAMMA + 0x00f0: 0x0650, # ARABIC KASRA + 0x00f1: 0x0651, # ARABIC SHADDA + 0x00f2: 0x0652, # ARABIC SUKUN + 0x00f3: None, + 0x00f4: None, + 0x00f5: None, + 0x00f6: None, + 0x00f7: None, + 0x00f8: None, + 0x00f9: None, + 0x00fa: None, + 0x00fb: None, + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u060c' # 0x00ac -> ARABIC COMMA + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\ufffe' + u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA + u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF + u'\u0628' # 0x00c8 -> ARABIC LETTER BEH + u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0x00ca -> ARABIC LETTER TEH + u'\u062b' # 0x00cb -> ARABIC LETTER THEH + u'\u062c' # 0x00cc -> ARABIC LETTER JEEM + u'\u062d' # 0x00cd -> ARABIC LETTER HAH + u'\u062e' # 0x00ce -> ARABIC LETTER KHAH + u'\u062f' # 0x00cf -> ARABIC LETTER DAL + u'\u0630' # 0x00d0 -> ARABIC LETTER THAL + u'\u0631' # 0x00d1 -> ARABIC LETTER REH + u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN + u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN + u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN + u'\u0635' # 0x00d5 -> ARABIC LETTER SAD + u'\u0636' # 0x00d6 -> ARABIC LETTER DAD + u'\u0637' # 0x00d7 -> ARABIC LETTER TAH + u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH + u'\u0639' # 0x00d9 -> ARABIC LETTER AIN + u'\u063a' # 0x00da -> ARABIC LETTER GHAIN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\u0641' # 0x00e1 -> ARABIC LETTER FEH + u'\u0642' # 0x00e2 -> ARABIC LETTER QAF + u'\u0643' # 0x00e3 -> ARABIC LETTER KAF + u'\u0644' # 0x00e4 -> ARABIC LETTER LAM + u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM + u'\u0646' # 0x00e6 -> ARABIC LETTER NOON + u'\u0647' # 0x00e7 -> ARABIC LETTER HEH + u'\u0648' # 0x00e8 -> ARABIC LETTER WAW + u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0x00ea -> ARABIC LETTER YEH + u'\u064b' # 0x00eb -> ARABIC FATHATAN + u'\u064c' # 0x00ec -> ARABIC DAMMATAN + u'\u064d' # 0x00ed -> ARABIC KASRATAN + u'\u064e' # 0x00ee -> ARABIC FATHA + u'\u064f' # 0x00ef -> ARABIC DAMMA + u'\u0650' # 0x00f0 -> ARABIC KASRA + u'\u0651' # 0x00f1 -> ARABIC SHADDA + u'\u0652' # 0x00f2 -> ARABIC SUKUN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0621: 0x00c1, # ARABIC LETTER HAMZA + 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0x00c7, # ARABIC LETTER ALEF + 0x0628: 0x00c8, # ARABIC LETTER BEH + 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0x00ca, # ARABIC LETTER TEH + 0x062b: 0x00cb, # ARABIC LETTER THEH + 0x062c: 0x00cc, # ARABIC LETTER JEEM + 0x062d: 0x00cd, # ARABIC LETTER HAH + 0x062e: 0x00ce, # ARABIC LETTER KHAH + 0x062f: 0x00cf, # ARABIC LETTER DAL + 0x0630: 0x00d0, # ARABIC LETTER THAL + 0x0631: 0x00d1, # ARABIC LETTER REH + 0x0632: 0x00d2, # ARABIC LETTER ZAIN + 0x0633: 0x00d3, # ARABIC LETTER SEEN + 0x0634: 0x00d4, # ARABIC LETTER SHEEN + 0x0635: 0x00d5, # ARABIC LETTER SAD + 0x0636: 0x00d6, # ARABIC LETTER DAD + 0x0637: 0x00d7, # ARABIC LETTER TAH + 0x0638: 0x00d8, # ARABIC LETTER ZAH + 0x0639: 0x00d9, # ARABIC LETTER AIN + 0x063a: 0x00da, # ARABIC LETTER GHAIN + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0641: 0x00e1, # ARABIC LETTER FEH + 0x0642: 0x00e2, # ARABIC LETTER QAF + 0x0643: 0x00e3, # ARABIC LETTER KAF + 0x0644: 0x00e4, # ARABIC LETTER LAM + 0x0645: 0x00e5, # ARABIC LETTER MEEM + 0x0646: 0x00e6, # ARABIC LETTER NOON + 0x0647: 0x00e7, # ARABIC LETTER HEH + 0x0648: 0x00e8, # ARABIC LETTER WAW + 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0x00ea, # ARABIC LETTER YEH + 0x064b: 0x00eb, # ARABIC FATHATAN + 0x064c: 0x00ec, # ARABIC DAMMATAN + 0x064d: 0x00ed, # ARABIC KASRATAN + 0x064e: 0x00ee, # ARABIC FATHA + 0x064f: 0x00ef, # ARABIC DAMMA + 0x0650: 0x00f0, # ARABIC KASRA + 0x0651: 0x00f1, # ARABIC SHADDA + 0x0652: 0x00f2, # ARABIC SUKUN +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_7.py b/Lib/encodings/iso8859_7.py index 2530c68..7d14c3e 100644 --- a/Lib/encodings/iso8859_7.py +++ b/Lib/encodings/iso8859_7.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-7.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,88 +32,603 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00a4: None, - 0x00a5: None, - 0x00aa: None, - 0x00ae: None, - 0x00af: 0x2015, # HORIZONTAL BAR - 0x00b4: 0x0384, # GREEK TONOS - 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS - 0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU - 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU - 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI - 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00d2: None, - 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00ec: 0x03bc, # GREEK SMALL LETTER MU - 0x00ed: 0x03bd, # GREEK SMALL LETTER NU - 0x00ee: 0x03be, # GREEK SMALL LETTER XI - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ff: None, + 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a4: 0x20ac, # EURO SIGN + 0x00a5: 0x20af, # DRACHMA SIGN + 0x00aa: 0x037a, # GREEK YPOGEGRAMMENI + 0x00ae: None, + 0x00af: 0x2015, # HORIZONTAL BAR + 0x00b4: 0x0384, # GREEK TONOS + 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS + 0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU + 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU + 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI + 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, + 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ec: 0x03bc, # GREEK SMALL LETTER MU + 0x00ed: 0x03bd, # GREEK SMALL LETTER NU + 0x00ee: 0x03be, # GREEK SMALL LETTER XI + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u2018' # 0x00a1 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00a2 -> RIGHT SINGLE QUOTATION MARK + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u20ac' # 0x00a4 -> EURO SIGN + u'\u20af' # 0x00a5 -> DRACHMA SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u037a' # 0x00aa -> GREEK YPOGEGRAMMENI + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\ufffe' + u'\u2015' # 0x00af -> HORIZONTAL BAR + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u0384' # 0x00b4 -> GREEK TONOS + u'\u0385' # 0x00b5 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0x00b6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u0388' # 0x00b8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00b9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ba -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0x00bc -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\u038e' # 0x00be -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00bf -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0x00c0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0x00c1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00c2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00c3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00c4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00c5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00c6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00c7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x00c8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00c9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x00ca -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00cb -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00cc -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00cd -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x00ce -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00cf -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x00d0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00d1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' + u'\u03a3' # 0x00d3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0x00da -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00db -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0x00dc -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00dd -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00de -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x00df -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0x00e0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0x00e1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00e2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00e3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x00e4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00e5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x00e6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00ea -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00eb -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00ec -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00ed -> GREEK SMALL LETTER NU + u'\u03be' # 0x00ee -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00ef -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00f0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00f1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0x00f2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0x00f3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0x00f4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00f5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00f8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0x00f9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0x00fa -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0x00fc -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00fd -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0x00fe -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x037a: 0x00aa, # GREEK YPOGEGRAMMENI + 0x0384: 0x00b4, # GREEK TONOS + 0x0385: 0x00b5, # GREEK DIALYTIKA TONOS + 0x0386: 0x00b6, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00b8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00b9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ba, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00bc, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00be, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00bf, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00c0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00c1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00c2, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00c3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00c4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00c5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00c6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00c7, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00c8, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00c9, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00ca, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00cb, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00cc, # GREEK CAPITAL LETTER MU + 0x039d: 0x00cd, # GREEK CAPITAL LETTER NU + 0x039e: 0x00ce, # GREEK CAPITAL LETTER XI + 0x039f: 0x00cf, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00d0, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00d1, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00d3, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d4, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d5, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d6, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d7, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d8, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d9, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00da, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00db, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00dc, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00dd, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00de, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00df, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00e0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00e1, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00e2, # GREEK SMALL LETTER BETA + 0x03b3: 0x00e3, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00e4, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00e5, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e6, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e7, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e8, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e9, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00ea, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00eb, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00ec, # GREEK SMALL LETTER MU + 0x03bd: 0x00ed, # GREEK SMALL LETTER NU + 0x03be: 0x00ee, # GREEK SMALL LETTER XI + 0x03bf: 0x00ef, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00f0, # GREEK SMALL LETTER PI + 0x03c1: 0x00f1, # GREEK SMALL LETTER RHO + 0x03c2: 0x00f2, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00f3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00f4, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f5, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f6, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f7, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f8, # GREEK SMALL LETTER PSI + 0x03c9: 0x00f9, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00fa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00fc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00fd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fe, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x00af, # HORIZONTAL BAR + 0x2018: 0x00a1, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00a2, # RIGHT SINGLE QUOTATION MARK + 0x20ac: 0x00a4, # EURO SIGN + 0x20af: 0x00a5, # DRACHMA SIGN +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_8.py b/Lib/encodings/iso8859_8.py index d0176ee..2d01db0 100644 --- a/Lib/encodings/iso8859_8.py +++ b/Lib/encodings/iso8859_8.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-8.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,76 +32,558 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: None, - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00bf: None, - 0x00c0: None, - 0x00c1: None, - 0x00c2: None, - 0x00c3: None, - 0x00c4: None, - 0x00c5: None, - 0x00c6: None, - 0x00c7: None, - 0x00c8: None, - 0x00c9: None, - 0x00ca: None, - 0x00cb: None, - 0x00cc: None, - 0x00cd: None, - 0x00ce: None, - 0x00cf: None, - 0x00d0: None, - 0x00d1: None, - 0x00d2: None, - 0x00d3: None, - 0x00d4: None, - 0x00d5: None, - 0x00d6: None, - 0x00d7: None, - 0x00d8: None, - 0x00d9: None, - 0x00da: None, - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: 0x2017, # DOUBLE LOW LINE - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, - 0x00fc: None, - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, + 0x00a1: None, + 0x00aa: 0x00d7, # MULTIPLICATION SIGN + 0x00ba: 0x00f7, # DIVISION SIGN + 0x00bf: None, + 0x00c0: None, + 0x00c1: None, + 0x00c2: None, + 0x00c3: None, + 0x00c4: None, + 0x00c5: None, + 0x00c6: None, + 0x00c7: None, + 0x00c8: None, + 0x00c9: None, + 0x00ca: None, + 0x00cb: None, + 0x00cc: None, + 0x00cd: None, + 0x00ce: None, + 0x00cf: None, + 0x00d0: None, + 0x00d1: None, + 0x00d2: None, + 0x00d3: None, + 0x00d4: None, + 0x00d5: None, + 0x00d6: None, + 0x00d7: None, + 0x00d8: None, + 0x00d9: None, + 0x00da: None, + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: 0x2017, # DOUBLE LOW LINE + 0x00e0: 0x05d0, # HEBREW LETTER ALEF + 0x00e1: 0x05d1, # HEBREW LETTER BET + 0x00e2: 0x05d2, # HEBREW LETTER GIMEL + 0x00e3: 0x05d3, # HEBREW LETTER DALET + 0x00e4: 0x05d4, # HEBREW LETTER HE + 0x00e5: 0x05d5, # HEBREW LETTER VAV + 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN + 0x00e7: 0x05d7, # HEBREW LETTER HET + 0x00e8: 0x05d8, # HEBREW LETTER TET + 0x00e9: 0x05d9, # HEBREW LETTER YOD + 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF + 0x00eb: 0x05db, # HEBREW LETTER KAF + 0x00ec: 0x05dc, # HEBREW LETTER LAMED + 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM + 0x00ee: 0x05de, # HEBREW LETTER MEM + 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN + 0x00f0: 0x05e0, # HEBREW LETTER NUN + 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH + 0x00f2: 0x05e2, # HEBREW LETTER AYIN + 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE + 0x00f4: 0x05e4, # HEBREW LETTER PE + 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x00f6: 0x05e6, # HEBREW LETTER TSADI + 0x00f7: 0x05e7, # HEBREW LETTER QOF + 0x00f8: 0x05e8, # HEBREW LETTER RESH + 0x00f9: 0x05e9, # HEBREW LETTER SHIN + 0x00fa: 0x05ea, # HEBREW LETTER TAV + 0x00fb: None, + 0x00fc: None, + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\ufffe' + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xd7' # 0x00aa -> MULTIPLICATION SIGN + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xf7' # 0x00ba -> DIVISION SIGN + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u2017' # 0x00df -> DOUBLE LOW LINE + u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF + u'\u05d1' # 0x00e1 -> HEBREW LETTER BET + u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET + u'\u05d4' # 0x00e4 -> HEBREW LETTER HE + u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV + u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x00e7 -> HEBREW LETTER HET + u'\u05d8' # 0x00e8 -> HEBREW LETTER TET + u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD + u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x00eb -> HEBREW LETTER KAF + u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED + u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x00ee -> HEBREW LETTER MEM + u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN + u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN + u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x00f4 -> HEBREW LETTER PE + u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI + u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF + u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH + u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN + u'\u05ea' # 0x00fa -> HEBREW LETTER TAV + u'\ufffe' + u'\ufffe' + u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x00aa, # MULTIPLICATION SIGN + 0x00f7: 0x00ba, # DIVISION SIGN + 0x05d0: 0x00e0, # HEBREW LETTER ALEF + 0x05d1: 0x00e1, # HEBREW LETTER BET + 0x05d2: 0x00e2, # HEBREW LETTER GIMEL + 0x05d3: 0x00e3, # HEBREW LETTER DALET + 0x05d4: 0x00e4, # HEBREW LETTER HE + 0x05d5: 0x00e5, # HEBREW LETTER VAV + 0x05d6: 0x00e6, # HEBREW LETTER ZAYIN + 0x05d7: 0x00e7, # HEBREW LETTER HET + 0x05d8: 0x00e8, # HEBREW LETTER TET + 0x05d9: 0x00e9, # HEBREW LETTER YOD + 0x05da: 0x00ea, # HEBREW LETTER FINAL KAF + 0x05db: 0x00eb, # HEBREW LETTER KAF + 0x05dc: 0x00ec, # HEBREW LETTER LAMED + 0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM + 0x05de: 0x00ee, # HEBREW LETTER MEM + 0x05df: 0x00ef, # HEBREW LETTER FINAL NUN + 0x05e0: 0x00f0, # HEBREW LETTER NUN + 0x05e1: 0x00f1, # HEBREW LETTER SAMEKH + 0x05e2: 0x00f2, # HEBREW LETTER AYIN + 0x05e3: 0x00f3, # HEBREW LETTER FINAL PE + 0x05e4: 0x00f4, # HEBREW LETTER PE + 0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x00f6, # HEBREW LETTER TSADI + 0x05e7: 0x00f7, # HEBREW LETTER QOF + 0x05e8: 0x00f8, # HEBREW LETTER RESH + 0x05e9: 0x00f9, # HEBREW LETTER SHIN + 0x05ea: 0x00fa, # HEBREW LETTER TAV + 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK + 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK + 0x2017: 0x00df, # DOUBLE LOW LINE +}
\ No newline at end of file diff --git a/Lib/encodings/iso8859_9.py b/Lib/encodings/iso8859_9.py index 28a603f..37fc7f8 100644 --- a/Lib/encodings/iso8859_9.py +++ b/Lib/encodings/iso8859_9.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-9.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,14 +32,532 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> <control> + u'\x81' # 0x0081 -> <control> + u'\x82' # 0x0082 -> <control> + u'\x83' # 0x0083 -> <control> + u'\x84' # 0x0084 -> <control> + u'\x85' # 0x0085 -> <control> + u'\x86' # 0x0086 -> <control> + u'\x87' # 0x0087 -> <control> + u'\x88' # 0x0088 -> <control> + u'\x89' # 0x0089 -> <control> + u'\x8a' # 0x008a -> <control> + u'\x8b' # 0x008b -> <control> + u'\x8c' # 0x008c -> <control> + u'\x8d' # 0x008d -> <control> + u'\x8e' # 0x008e -> <control> + u'\x8f' # 0x008f -> <control> + u'\x90' # 0x0090 -> <control> + u'\x91' # 0x0091 -> <control> + u'\x92' # 0x0092 -> <control> + u'\x93' # 0x0093 -> <control> + u'\x94' # 0x0094 -> <control> + u'\x95' # 0x0095 -> <control> + u'\x96' # 0x0096 -> <control> + u'\x97' # 0x0097 -> <control> + u'\x98' # 0x0098 -> <control> + u'\x99' # 0x0099 -> <control> + u'\x9a' # 0x009a -> <control> + u'\x9b' # 0x009b -> <control> + u'\x9c' # 0x009c -> <control> + u'\x9d' # 0x009d -> <control> + u'\x9e' # 0x009e -> <control> + u'\x9f' # 0x009f -> <control> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0x00d0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0x00dd -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0x00de -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0x00f0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0x00fd -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0x00fe -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # <control> + 0x0081: 0x0081, # <control> + 0x0082: 0x0082, # <control> + 0x0083: 0x0083, # <control> + 0x0084: 0x0084, # <control> + 0x0085: 0x0085, # <control> + 0x0086: 0x0086, # <control> + 0x0087: 0x0087, # <control> + 0x0088: 0x0088, # <control> + 0x0089: 0x0089, # <control> + 0x008a: 0x008a, # <control> + 0x008b: 0x008b, # <control> + 0x008c: 0x008c, # <control> + 0x008d: 0x008d, # <control> + 0x008e: 0x008e, # <control> + 0x008f: 0x008f, # <control> + 0x0090: 0x0090, # <control> + 0x0091: 0x0091, # <control> + 0x0092: 0x0092, # <control> + 0x0093: 0x0093, # <control> + 0x0094: 0x0094, # <control> + 0x0095: 0x0095, # <control> + 0x0096: 0x0096, # <control> + 0x0097: 0x0097, # <control> + 0x0098: 0x0098, # <control> + 0x0099: 0x0099, # <control> + 0x009a: 0x009a, # <control> + 0x009b: 0x009b, # <control> + 0x009c: 0x009c, # <control> + 0x009d: 0x009d, # <control> + 0x009e: 0x009e, # <control> + 0x009f: 0x009f, # <control> + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00d0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00f0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x00dd, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00fd, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x00de, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00fe, # LATIN SMALL LETTER S WITH CEDILLA +}
\ No newline at end of file diff --git a/Lib/encodings/koi8_r.py b/Lib/encodings/koi8_r.py index 7494ca6..f1dc3ec 100644 --- a/Lib/encodings/koi8_r.py +++ b/Lib/encodings/koi8_r.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/KOI8-R.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x008b: 0x2580, # UPPER HALF BLOCK - 0x008c: 0x2584, # LOWER HALF BLOCK - 0x008d: 0x2588, # FULL BLOCK - 0x008e: 0x258c, # LEFT HALF BLOCK - 0x008f: 0x2590, # RIGHT HALF BLOCK - 0x0090: 0x2591, # LIGHT SHADE - 0x0091: 0x2592, # MEDIUM SHADE - 0x0092: 0x2593, # DARK SHADE - 0x0093: 0x2320, # TOP HALF INTEGRAL - 0x0094: 0x25a0, # BLACK SQUARE - 0x0095: 0x2219, # BULLET OPERATOR - 0x0096: 0x221a, # SQUARE ROOT - 0x0097: 0x2248, # ALMOST EQUAL TO - 0x0098: 0x2264, # LESS-THAN OR EQUAL TO - 0x0099: 0x2265, # GREATER-THAN OR EQUAL TO - 0x009a: 0x00a0, # NO-BREAK SPACE - 0x009b: 0x2321, # BOTTOM HALF INTEGRAL - 0x009c: 0x00b0, # DEGREE SIGN - 0x009d: 0x00b2, # SUPERSCRIPT TWO - 0x009e: 0x00b7, # MIDDLE DOT - 0x009f: 0x00f7, # DIVISION SIGN - 0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00bf: 0x00a9, # COPYRIGHT SIGN - 0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00c1: 0x0430, # CYRILLIC SMALL LETTER A - 0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00c9: 0x0438, # CYRILLIC SMALL LETTER I - 0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00cf: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00d5: 0x0443, # CYRILLIC SMALL LETTER U - 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00dc: 0x044d, # CYRILLIC SMALL LETTER E - 0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x008b: 0x2580, # UPPER HALF BLOCK + 0x008c: 0x2584, # LOWER HALF BLOCK + 0x008d: 0x2588, # FULL BLOCK + 0x008e: 0x258c, # LEFT HALF BLOCK + 0x008f: 0x2590, # RIGHT HALF BLOCK + 0x0090: 0x2591, # LIGHT SHADE + 0x0091: 0x2592, # MEDIUM SHADE + 0x0092: 0x2593, # DARK SHADE + 0x0093: 0x2320, # TOP HALF INTEGRAL + 0x0094: 0x25a0, # BLACK SQUARE + 0x0095: 0x2219, # BULLET OPERATOR + 0x0096: 0x221a, # SQUARE ROOT + 0x0097: 0x2248, # ALMOST EQUAL TO + 0x0098: 0x2264, # LESS-THAN OR EQUAL TO + 0x0099: 0x2265, # GREATER-THAN OR EQUAL TO + 0x009a: 0x00a0, # NO-BREAK SPACE + 0x009b: 0x2321, # BOTTOM HALF INTEGRAL + 0x009c: 0x00b0, # DEGREE SIGN + 0x009d: 0x00b2, # SUPERSCRIPT TWO + 0x009e: 0x00b7, # MIDDLE DOT + 0x009f: 0x00f7, # DIVISION SIGN + 0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00bf: 0x00a9, # COPYRIGHT SIGN + 0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00c1: 0x0430, # CYRILLIC SMALL LETTER A + 0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00c9: 0x0438, # CYRILLIC SMALL LETTER I + 0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00cf: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00d5: 0x0443, # CYRILLIC SMALL LETTER U + 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00dc: 0x044d, # CYRILLIC SMALL LETTER E + 0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u2500' # 0x0080 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x0081 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x0082 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x0083 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x0084 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x0085 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x0086 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x0087 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x0088 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x0089 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x008a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x008b -> UPPER HALF BLOCK + u'\u2584' # 0x008c -> LOWER HALF BLOCK + u'\u2588' # 0x008d -> FULL BLOCK + u'\u258c' # 0x008e -> LEFT HALF BLOCK + u'\u2590' # 0x008f -> RIGHT HALF BLOCK + u'\u2591' # 0x0090 -> LIGHT SHADE + u'\u2592' # 0x0091 -> MEDIUM SHADE + u'\u2593' # 0x0092 -> DARK SHADE + u'\u2320' # 0x0093 -> TOP HALF INTEGRAL + u'\u25a0' # 0x0094 -> BLACK SQUARE + u'\u2219' # 0x0095 -> BULLET OPERATOR + u'\u221a' # 0x0096 -> SQUARE ROOT + u'\u2248' # 0x0097 -> ALMOST EQUAL TO + u'\u2264' # 0x0098 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x0099 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x009a -> NO-BREAK SPACE + u'\u2321' # 0x009b -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x009c -> DEGREE SIGN + u'\xb2' # 0x009d -> SUPERSCRIPT TWO + u'\xb7' # 0x009e -> MIDDLE DOT + u'\xf7' # 0x009f -> DIVISION SIGN + u'\u2550' # 0x00a0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0x00a1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0x00a2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0x00a3 -> CYRILLIC SMALL LETTER IO + u'\u2553' # 0x00a4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u2554' # 0x00a5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2555' # 0x00a6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2556' # 0x00a7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2557' # 0x00a8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0x00a9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0x00aa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00ab -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0x00ac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u255c' # 0x00ad -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255d' # 0x00ae -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0x00af -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00b0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0x00b1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0x00b2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0x00b3 -> CYRILLIC CAPITAL LETTER IO + u'\u2562' # 0x00b4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2563' # 0x00b5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2564' # 0x00b6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2566' # 0x00b8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0x00b9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00ba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0x00bb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0x00bc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u256b' # 0x00bd -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256c' # 0x00be -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0x00bf -> COPYRIGHT SIGN + u'\u044e' # 0x00c0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0x00c1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00c2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0x00c3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0x00c4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00c5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0x00c6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0x00c7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0x00c8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0x00c9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00ca -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00cb -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00cc -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00cd -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ce -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00cf -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00d0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0x00d1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0x00d2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00d3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00d4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00d5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0x00d6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0x00d7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0x00d8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0x00d9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0x00da -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0x00db -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0x00dc -> CYRILLIC SMALL LETTER E + u'\u0449' # 0x00dd -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0x00de -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0x00df -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0x00e0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0x00e1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x00e2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0x00e3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0x00e4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x00e5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0x00e6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0x00e7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0x00e8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0x00e9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x00ea -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x00eb -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x00ec -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x00ed -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x00ee -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x00ef -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x00f0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0x00f1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0x00f2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x00f3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x00f4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x00f5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0x00f6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0x00f7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0x00f8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0x00f9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0x00fa -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0x00fb -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0x00fc -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0x00fd -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0x00fe -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0x00ff -> CYRILLIC CAPITAL LETTER HARD SIGN +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x009a, # NO-BREAK SPACE + 0x00a9: 0x00bf, # COPYRIGHT SIGN + 0x00b0: 0x009c, # DEGREE SIGN + 0x00b2: 0x009d, # SUPERSCRIPT TWO + 0x00b7: 0x009e, # MIDDLE DOT + 0x00f7: 0x009f, # DIVISION SIGN + 0x0401: 0x00b3, # CYRILLIC CAPITAL LETTER IO + 0x0410: 0x00e1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00e2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00f7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00e7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00e4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00e5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00f6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00fa, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00e9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00ea, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00eb, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00ec, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00ed, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00ee, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00ef, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00f0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00f2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00f3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00f4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00f5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00e6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00e8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00e3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00fe, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00fb, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00fd, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x00ff, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00f9, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00f8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00fc, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x00e0, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00f1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00c1, # CYRILLIC SMALL LETTER A + 0x0431: 0x00c2, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00d7, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00c7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00c4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00c5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00d6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00da, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00c9, # CYRILLIC SMALL LETTER I + 0x0439: 0x00ca, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00cb, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00cc, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00cd, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ce, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00cf, # CYRILLIC SMALL LETTER O + 0x043f: 0x00d0, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00d2, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00d3, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00d4, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00d5, # CYRILLIC SMALL LETTER U + 0x0444: 0x00c6, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00c8, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00c3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00de, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00db, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00dd, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00df, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00d9, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00d8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00dc, # CYRILLIC SMALL LETTER E + 0x044e: 0x00c0, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00d1, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00a3, # CYRILLIC SMALL LETTER IO + 0x2219: 0x0095, # BULLET OPERATOR + 0x221a: 0x0096, # SQUARE ROOT + 0x2248: 0x0097, # ALMOST EQUAL TO + 0x2264: 0x0098, # LESS-THAN OR EQUAL TO + 0x2265: 0x0099, # GREATER-THAN OR EQUAL TO + 0x2320: 0x0093, # TOP HALF INTEGRAL + 0x2321: 0x009b, # BOTTOM HALF INTEGRAL + 0x2500: 0x0080, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x0081, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x0082, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x0083, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x0084, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x0085, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x0086, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x0087, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x0088, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x0089, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x008a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00a0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00a1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00a2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00a4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00a5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00a6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00a7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00a8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00a9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00aa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00ab, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00ac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00ad, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00ae, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00af, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00b0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00b1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00b6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00b8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00b9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00ba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00bb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00bc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00bd, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00be, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x008b, # UPPER HALF BLOCK + 0x2584: 0x008c, # LOWER HALF BLOCK + 0x2588: 0x008d, # FULL BLOCK + 0x258c: 0x008e, # LEFT HALF BLOCK + 0x2590: 0x008f, # RIGHT HALF BLOCK + 0x2591: 0x0090, # LIGHT SHADE + 0x2592: 0x0091, # MEDIUM SHADE + 0x2593: 0x0092, # DARK SHADE + 0x25a0: 0x0094, # BLACK SQUARE +}
\ No newline at end of file diff --git a/Lib/encodings/mac_cyrillic.py b/Lib/encodings/mac_cyrillic.py index 922523b..7dc8e41 100644 --- a/Lib/encodings/mac_cyrillic.py +++ b/Lib/encodings/mac_cyrillic.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,131 +32,650 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x00dc: 0x2116, # NUMERO SIGN - 0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00de: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00df: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x00a4, # CURRENCY SIGN + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b6: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00dc: 0x2116, # NUMERO SIGN + 0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00de: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00df: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x20ac, # EURO SIGN }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA + u'\u2020' # 0x00a0 -> DAGGER + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\u0490' # 0x00a2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\u0406' # 0x00a7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\u0402' # 0x00ab -> CYRILLIC CAPITAL LETTER DJE + u'\u0452' # 0x00ac -> CYRILLIC SMALL LETTER DJE + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\u0403' # 0x00ae -> CYRILLIC CAPITAL LETTER GJE + u'\u0453' # 0x00af -> CYRILLIC SMALL LETTER GJE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\u0456' # 0x00b4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u0491' # 0x00b6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\u0408' # 0x00b7 -> CYRILLIC CAPITAL LETTER JE + u'\u0404' # 0x00b8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0x00b9 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0x00ba -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0x00bb -> CYRILLIC SMALL LETTER YI + u'\u0409' # 0x00bc -> CYRILLIC CAPITAL LETTER LJE + u'\u0459' # 0x00bd -> CYRILLIC SMALL LETTER LJE + u'\u040a' # 0x00be -> CYRILLIC CAPITAL LETTER NJE + u'\u045a' # 0x00bf -> CYRILLIC SMALL LETTER NJE + u'\u0458' # 0x00c0 -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0x00c1 -> CYRILLIC CAPITAL LETTER DZE + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\u040b' # 0x00cb -> CYRILLIC CAPITAL LETTER TSHE + u'\u045b' # 0x00cc -> CYRILLIC SMALL LETTER TSHE + u'\u040c' # 0x00cd -> CYRILLIC CAPITAL LETTER KJE + u'\u045c' # 0x00ce -> CYRILLIC SMALL LETTER KJE + u'\u0455' # 0x00cf -> CYRILLIC SMALL LETTER DZE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u201e' # 0x00d7 -> DOUBLE LOW-9 QUOTATION MARK + u'\u040e' # 0x00d8 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00d9 -> CYRILLIC SMALL LETTER SHORT U + u'\u040f' # 0x00da -> CYRILLIC CAPITAL LETTER DZHE + u'\u045f' # 0x00db -> CYRILLIC SMALL LETTER DZHE + u'\u2116' # 0x00dc -> NUMERO SIGN + u'\u0401' # 0x00dd -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0x00de -> CYRILLIC SMALL LETTER IO + u'\u044f' # 0x00df -> CYRILLIC SMALL LETTER YA + u'\u0430' # 0x00e0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00e1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00e2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00e3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00e4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00e5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00e6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00e7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00e8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00e9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00ea -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00eb -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ec -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ed -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ee -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00ef -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0x00f0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00f1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00f2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00f3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00f4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00f5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00f6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00f7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00f8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00fa -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00fb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00fc -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00fd -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00fe -> CYRILLIC SMALL LETTER YU + u'\u20ac' # 0x00ff -> EURO SIGN +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00f7: 0x00d6, # DIVISION SIGN + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x0401: 0x00dd, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x00ab, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x00ae, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x00b8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x00c1, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x00a7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00ba, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x00b7, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x00bc, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x00be, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x00cb, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x00cd, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x00d8, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x00da, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00e0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00e1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00e2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00e3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00e4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00e5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00e7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00e8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00e9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00ea, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00eb, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ec, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ed, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ee, # CYRILLIC SMALL LETTER O + 0x043f: 0x00ef, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00f0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00f1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00f2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00f3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00f4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00f5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00f6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00f7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00fa, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00fb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00fc, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00fd, # CYRILLIC SMALL LETTER E + 0x044e: 0x00fe, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00df, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00de, # CYRILLIC SMALL LETTER IO + 0x0452: 0x00ac, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x00af, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x00b9, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x00cf, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x00b4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00bb, # CYRILLIC SMALL LETTER YI + 0x0458: 0x00c0, # CYRILLIC SMALL LETTER JE + 0x0459: 0x00bd, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x00bf, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x00cc, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x00ce, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x00d9, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x00db, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0x00a2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0x00b6, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00d7, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x20ac: 0x00ff, # EURO SIGN + 0x2116: 0x00dc, # NUMERO SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2206: 0x00c6, # INCREMENT + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO +}
\ No newline at end of file diff --git a/Lib/encodings/mac_greek.py b/Lib/encodings/mac_greek.py index 473a157..c4fe120 100644 --- a/Lib/encodings/mac_greek.py +++ b/Lib/encodings/mac_greek.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/GREEK.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,134 +32,652 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00b9, # SUPERSCRIPT ONE - 0x0082: 0x00b2, # SUPERSCRIPT TWO - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00b3, # SUPERSCRIPT THREE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x0385, # GREEK DIALYTIKA TONOS - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x0384, # GREEK TONOS - 0x008c: 0x00a8, # DIAERESIS - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00a3, # POUND SIGN - 0x0093: 0x2122, # TRADE MARK SIGN - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x2022, # BULLET - 0x0097: 0x00bd, # VULGAR FRACTION ONE HALF - 0x0098: 0x2030, # PER MILLE SIGN - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00a6, # BROKEN BAR - 0x009c: 0x00ad, # SOFT HYPHEN - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMBDA - 0x00a5: 0x039e, # GREEK CAPITAL LETTER XI - 0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00ac: 0x00a7, # SECTION SIGN - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00b0, # DEGREE SIGN - 0x00af: 0x0387, # GREEK ANO TELEIA - 0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00bb: 0x039c, # GREEK CAPITAL LETTER MU - 0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00c1: 0x039d, # GREEK CAPITAL LETTER NU - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2015, # HORIZONTAL BAR - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03c8, # GREEK SMALL LETTER PSI - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03be, # GREEK SMALL LETTER XI - 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA - 0x00ed: 0x03bc, # GREEK SMALL LETTER MU - 0x00ee: 0x03bd, # GREEK SMALL LETTER NU - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA - 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00ff: None, # UNDEFINED + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00b9, # SUPERSCRIPT ONE + 0x0082: 0x00b2, # SUPERSCRIPT TWO + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00b3, # SUPERSCRIPT THREE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x0385, # GREEK DIALYTIKA TONOS + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x0384, # GREEK TONOS + 0x008c: 0x00a8, # DIAERESIS + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00a3, # POUND SIGN + 0x0093: 0x2122, # TRADE MARK SIGN + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x2022, # BULLET + 0x0097: 0x00bd, # VULGAR FRACTION ONE HALF + 0x0098: 0x2030, # PER MILLE SIGN + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00a6, # BROKEN BAR + 0x009c: 0x20ac, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00a5: 0x039e, # GREEK CAPITAL LETTER XI + 0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00ac: 0x00a7, # SECTION SIGN + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00b0, # DEGREE SIGN + 0x00af: 0x00b7, # MIDDLE DOT + 0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00bb: 0x039c, # GREEK CAPITAL LETTER MU + 0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00c1: 0x039d, # GREEK CAPITAL LETTER NU + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2015, # HORIZONTAL BAR + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03c8, # GREEK SMALL LETTER PSI + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03be, # GREEK SMALL LETTER XI + 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ed: 0x03bc, # GREEK SMALL LETTER MU + 0x00ee: 0x03bd, # GREEK SMALL LETTER NU + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA + 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00ff: 0x00ad, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xb9' # 0x0081 -> SUPERSCRIPT ONE + u'\xb2' # 0x0082 -> SUPERSCRIPT TWO + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xb3' # 0x0084 -> SUPERSCRIPT THREE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0385' # 0x0087 -> GREEK DIALYTIKA TONOS + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0384' # 0x008b -> GREEK TONOS + u'\xa8' # 0x008c -> DIAERESIS + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xa3' # 0x0092 -> POUND SIGN + u'\u2122' # 0x0093 -> TRADE MARK SIGN + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u2022' # 0x0096 -> BULLET + u'\xbd' # 0x0097 -> VULGAR FRACTION ONE HALF + u'\u2030' # 0x0098 -> PER MILLE SIGN + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xa6' # 0x009b -> BROKEN BAR + u'\u20ac' # 0x009c -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0x00a0 -> DAGGER + u'\u0393' # 0x00a1 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00a2 -> GREEK CAPITAL LETTER DELTA + u'\u0398' # 0x00a3 -> GREEK CAPITAL LETTER THETA + u'\u039b' # 0x00a4 -> GREEK CAPITAL LETTER LAMDA + u'\u039e' # 0x00a5 -> GREEK CAPITAL LETTER XI + u'\u03a0' # 0x00a6 -> GREEK CAPITAL LETTER PI + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u03a3' # 0x00aa -> GREEK CAPITAL LETTER SIGMA + u'\u03aa' # 0x00ab -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\xa7' # 0x00ac -> SECTION SIGN + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xb0' # 0x00ae -> DEGREE SIGN + u'\xb7' # 0x00af -> MIDDLE DOT + u'\u0391' # 0x00b0 -> GREEK CAPITAL LETTER ALPHA + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\u0392' # 0x00b5 -> GREEK CAPITAL LETTER BETA + u'\u0395' # 0x00b6 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00b7 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00b8 -> GREEK CAPITAL LETTER ETA + u'\u0399' # 0x00b9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x00ba -> GREEK CAPITAL LETTER KAPPA + u'\u039c' # 0x00bb -> GREEK CAPITAL LETTER MU + u'\u03a6' # 0x00bc -> GREEK CAPITAL LETTER PHI + u'\u03ab' # 0x00bd -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03a8' # 0x00be -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00bf -> GREEK CAPITAL LETTER OMEGA + u'\u03ac' # 0x00c0 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u039d' # 0x00c1 -> GREEK CAPITAL LETTER NU + u'\xac' # 0x00c2 -> NOT SIGN + u'\u039f' # 0x00c3 -> GREEK CAPITAL LETTER OMICRON + u'\u03a1' # 0x00c4 -> GREEK CAPITAL LETTER RHO + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u03a4' # 0x00c6 -> GREEK CAPITAL LETTER TAU + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\u03a5' # 0x00cb -> GREEK CAPITAL LETTER UPSILON + u'\u03a7' # 0x00cc -> GREEK CAPITAL LETTER CHI + u'\u0386' # 0x00cd -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x00ce -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2015' # 0x00d1 -> HORIZONTAL BAR + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u0389' # 0x00d7 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00d8 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x00d9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x00da -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ad' # 0x00db -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00dc -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x00dd -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0x00de -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u038f' # 0x00df -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u03cd' # 0x00e0 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03b1' # 0x00e1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00e2 -> GREEK SMALL LETTER BETA + u'\u03c8' # 0x00e3 -> GREEK SMALL LETTER PSI + u'\u03b4' # 0x00e4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00e5 -> GREEK SMALL LETTER EPSILON + u'\u03c6' # 0x00e6 -> GREEK SMALL LETTER PHI + u'\u03b3' # 0x00e7 -> GREEK SMALL LETTER GAMMA + u'\u03b7' # 0x00e8 -> GREEK SMALL LETTER ETA + u'\u03b9' # 0x00e9 -> GREEK SMALL LETTER IOTA + u'\u03be' # 0x00ea -> GREEK SMALL LETTER XI + u'\u03ba' # 0x00eb -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00ec -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00ed -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00ee -> GREEK SMALL LETTER NU + u'\u03bf' # 0x00ef -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00f0 -> GREEK SMALL LETTER PI + u'\u03ce' # 0x00f1 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c1' # 0x00f2 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00f3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0x00f4 -> GREEK SMALL LETTER TAU + u'\u03b8' # 0x00f5 -> GREEK SMALL LETTER THETA + u'\u03c9' # 0x00f6 -> GREEK SMALL LETTER OMEGA + u'\u03c2' # 0x00f7 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c7' # 0x00f8 -> GREEK SMALL LETTER CHI + u'\u03c5' # 0x00f9 -> GREEK SMALL LETTER UPSILON + u'\u03b6' # 0x00fa -> GREEK SMALL LETTER ZETA + u'\u03ca' # 0x00fb -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u0390' # 0x00fd -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0x00fe -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\xad' # 0x00ff -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a3: 0x0092, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a6: 0x009b, # BROKEN BAR + 0x00a7: 0x00ac, # SECTION SIGN + 0x00a8: 0x008c, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ad: 0x00ff, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00b0: 0x00ae, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x0082, # SUPERSCRIPT TWO + 0x00b3: 0x0084, # SUPERSCRIPT THREE + 0x00b7: 0x00af, # MIDDLE DOT + 0x00b9: 0x0081, # SUPERSCRIPT ONE + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x0097, # VULGAR FRACTION ONE HALF + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x0384: 0x008b, # GREEK TONOS + 0x0385: 0x0087, # GREEK DIALYTIKA TONOS + 0x0386: 0x00cd, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00ce, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00d7, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00d8, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00d9, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00da, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00df, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00fd, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00b0, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00b5, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00a1, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00a2, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00b6, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00b7, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00b8, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00a3, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00b9, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00ba, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00a4, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00bb, # GREEK CAPITAL LETTER MU + 0x039d: 0x00c1, # GREEK CAPITAL LETTER NU + 0x039e: 0x00a5, # GREEK CAPITAL LETTER XI + 0x039f: 0x00c3, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00a6, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00c4, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00aa, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00c6, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00cb, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00bc, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00cc, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00be, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00bf, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00ab, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00bd, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00c0, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00db, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00dc, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00dd, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00fe, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00e1, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00e2, # GREEK SMALL LETTER BETA + 0x03b3: 0x00e7, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00e4, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00e5, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00fa, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e8, # GREEK SMALL LETTER ETA + 0x03b8: 0x00f5, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e9, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00eb, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00ec, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00ed, # GREEK SMALL LETTER MU + 0x03bd: 0x00ee, # GREEK SMALL LETTER NU + 0x03be: 0x00ea, # GREEK SMALL LETTER XI + 0x03bf: 0x00ef, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00f0, # GREEK SMALL LETTER PI + 0x03c1: 0x00f2, # GREEK SMALL LETTER RHO + 0x03c2: 0x00f7, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00f3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00f4, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f9, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00e6, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f8, # GREEK SMALL LETTER CHI + 0x03c8: 0x00e3, # GREEK SMALL LETTER PSI + 0x03c9: 0x00f6, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00fb, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00de, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00e0, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00f1, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0x00d0, # EN DASH + 0x2015: 0x00d1, # HORIZONTAL BAR + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2022: 0x0096, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x0098, # PER MILLE SIGN + 0x20ac: 0x009c, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x2122: 0x0093, # TRADE MARK SIGN + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO +}
\ No newline at end of file diff --git a/Lib/encodings/mac_iceland.py b/Lib/encodings/mac_iceland.py index 00bddf9..aa49238 100644 --- a/Lib/encodings/mac_iceland.py +++ b/Lib/encodings/mac_iceland.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ICELAND.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,130 +32,648 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x2044, # FRACTION SLASH - 0x00db: 0x00a4, # CURRENCY SIGN - 0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00dd: 0x00f0, # LATIN SMALL LETTER ETH - 0x00df: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00be: 0x00e6, # LATIN SMALL LETTER AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x2044, # FRACTION SLASH + 0x00db: 0x20ac, # EURO SIGN + 0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00dd: 0x00f0, # LATIN SMALL LETTER ETH + 0x00df: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0xf8ff, # Apple logo + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xdd' # 0x00a0 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\xb4' # 0x00ab -> ACUTE ACCENT + u'\xa8' # 0x00ac -> DIAERESIS + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xc6' # 0x00ae -> LATIN CAPITAL LETTER AE + u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0x00b7 -> N-ARY SUMMATION + u'\u220f' # 0x00b8 -> N-ARY PRODUCT + u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI + u'\u222b' # 0x00ba -> INTEGRAL + u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0x00be -> LATIN SMALL LETTER AE + u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK + u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u25ca' # 0x00d7 -> LOZENGE + u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0x00da -> FRACTION SLASH + u'\u20ac' # 0x00db -> EURO SIGN + u'\xd0' # 0x00dc -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0x00dd -> LATIN SMALL LETTER ETH + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN + u'\xfe' # 0x00df -> LATIN SMALL LETTER THORN + u'\xfd' # 0x00e0 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xb7' # 0x00e1 -> MIDDLE DOT + u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0x00e4 -> PER MILLE SIGN + u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0x00f0 -> Apple logo + u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0x00f5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0x00f7 -> SMALL TILDE + u'\xaf' # 0x00f8 -> MACRON + u'\u02d8' # 0x00f9 -> BREVE + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u02da' # 0x00fb -> RING ABOVE + u'\xb8' # 0x00fc -> CEDILLA + u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00fe -> OGONEK + u'\u02c7' # 0x00ff -> CARON +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a8: 0x00ac, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00af: 0x00f8, # MACRON + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00ab, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00b7: 0x00e1, # MIDDLE DOT + 0x00b8: 0x00fc, # CEDILLA + 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00c0, # INVERTED QUESTION MARK + 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00ae, # LATIN CAPITAL LETTER AE + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00dc, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00a0, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00be, # LATIN SMALL LETTER AE + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00dd, # LATIN SMALL LETTER ETH + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00e0, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00df, # LATIN SMALL LETTER THORN + 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00f5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02c7: 0x00ff, # CARON + 0x02d8: 0x00f9, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02da: 0x00fb, # RING ABOVE + 0x02db: 0x00fe, # OGONEK + 0x02dc: 0x00f7, # SMALL TILDE + 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT + 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA + 0x03c0: 0x00b9, # GREEK SMALL LETTER PI + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x00e4, # PER MILLE SIGN + 0x2044: 0x00da, # FRACTION SLASH + 0x20ac: 0x00db, # EURO SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL + 0x2206: 0x00c6, # INCREMENT + 0x220f: 0x00b8, # N-ARY PRODUCT + 0x2211: 0x00b7, # N-ARY SUMMATION + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x222b: 0x00ba, # INTEGRAL + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0x00d7, # LOZENGE + 0xf8ff: 0x00f0, # Apple logo +}
\ No newline at end of file diff --git a/Lib/encodings/mac_roman.py b/Lib/encodings/mac_roman.py index 3f02ab1..ec9b2c9 100644 --- a/Lib/encodings/mac_roman.py +++ b/Lib/encodings/mac_roman.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ROMAN.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,131 +32,649 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x2044, # FRACTION SLASH - 0x00db: 0x00a4, # CURRENCY SIGN - 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x00de: 0xfb01, # LATIN SMALL LIGATURE FI - 0x00df: 0xfb02, # LATIN SMALL LIGATURE FL - 0x00e0: 0x2021, # DOUBLE DAGGER - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00be: 0x00e6, # LATIN SMALL LETTER AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x2044, # FRACTION SLASH + 0x00db: 0x20ac, # EURO SIGN + 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x00de: 0xfb01, # LATIN SMALL LIGATURE FI + 0x00df: 0xfb02, # LATIN SMALL LIGATURE FL + 0x00e0: 0x2021, # DOUBLE DAGGER + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0xf8ff, # Apple logo + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0x00a0 -> DAGGER + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\xb4' # 0x00ab -> ACUTE ACCENT + u'\xa8' # 0x00ac -> DIAERESIS + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xc6' # 0x00ae -> LATIN CAPITAL LETTER AE + u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0x00b7 -> N-ARY SUMMATION + u'\u220f' # 0x00b8 -> N-ARY PRODUCT + u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI + u'\u222b' # 0x00ba -> INTEGRAL + u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0x00be -> LATIN SMALL LETTER AE + u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK + u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u25ca' # 0x00d7 -> LOZENGE + u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0x00da -> FRACTION SLASH + u'\u20ac' # 0x00db -> EURO SIGN + u'\u2039' # 0x00dc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0x00dd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufb01' # 0x00de -> LATIN SMALL LIGATURE FI + u'\ufb02' # 0x00df -> LATIN SMALL LIGATURE FL + u'\u2021' # 0x00e0 -> DOUBLE DAGGER + u'\xb7' # 0x00e1 -> MIDDLE DOT + u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0x00e4 -> PER MILLE SIGN + u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0x00f0 -> Apple logo + u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0x00f5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0x00f7 -> SMALL TILDE + u'\xaf' # 0x00f8 -> MACRON + u'\u02d8' # 0x00f9 -> BREVE + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u02da' # 0x00fb -> RING ABOVE + u'\xb8' # 0x00fc -> CEDILLA + u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00fe -> OGONEK + u'\u02c7' # 0x00ff -> CARON +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a8: 0x00ac, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00af: 0x00f8, # MACRON + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00ab, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00b7: 0x00e1, # MIDDLE DOT + 0x00b8: 0x00fc, # CEDILLA + 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00c0, # INVERTED QUESTION MARK + 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00ae, # LATIN CAPITAL LETTER AE + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00be, # LATIN SMALL LETTER AE + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00f5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02c7: 0x00ff, # CARON + 0x02d8: 0x00f9, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02da: 0x00fb, # RING ABOVE + 0x02db: 0x00fe, # OGONEK + 0x02dc: 0x00f7, # SMALL TILDE + 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT + 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA + 0x03c0: 0x00b9, # GREEK SMALL LETTER PI + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2021: 0x00e0, # DOUBLE DAGGER + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x00e4, # PER MILLE SIGN + 0x2039: 0x00dc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x00dd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0x00da, # FRACTION SLASH + 0x20ac: 0x00db, # EURO SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL + 0x2206: 0x00c6, # INCREMENT + 0x220f: 0x00b8, # N-ARY PRODUCT + 0x2211: 0x00b7, # N-ARY SUMMATION + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x222b: 0x00ba, # INTEGRAL + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0x00d7, # LOZENGE + 0xf8ff: 0x00f0, # Apple logo + 0xfb01: 0x00de, # LATIN SMALL LIGATURE FI + 0xfb02: 0x00df, # LATIN SMALL LIGATURE FL +}
\ No newline at end of file diff --git a/Lib/encodings/mac_turkish.py b/Lib/encodings/mac_turkish.py index 7f66f50..a404b35 100644 --- a/Lib/encodings/mac_turkish.py +++ b/Lib/encodings/mac_turkish.py @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/TURKISH.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,131 +32,649 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00e0: 0x2021, # DOUBLE DAGGER - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: None, # UNDEFINED - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00be: 0x00e6, # LATIN SMALL LETTER AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00e0: 0x2021, # DOUBLE DAGGER + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0xf8ff, # Apple logo + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0xf8a0, # undefined1 + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0x00a0 -> DAGGER + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\xb4' # 0x00ab -> ACUTE ACCENT + u'\xa8' # 0x00ac -> DIAERESIS + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xc6' # 0x00ae -> LATIN CAPITAL LETTER AE + u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0x00b7 -> N-ARY SUMMATION + u'\u220f' # 0x00b8 -> N-ARY PRODUCT + u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI + u'\u222b' # 0x00ba -> INTEGRAL + u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0x00be -> LATIN SMALL LETTER AE + u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK + u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u25ca' # 0x00d7 -> LOZENGE + u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u011e' # 0x00da -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0x00db -> LATIN SMALL LETTER G WITH BREVE + u'\u0130' # 0x00dc -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u0131' # 0x00dd -> LATIN SMALL LETTER DOTLESS I + u'\u015e' # 0x00de -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0x00df -> LATIN SMALL LETTER S WITH CEDILLA + u'\u2021' # 0x00e0 -> DOUBLE DAGGER + u'\xb7' # 0x00e1 -> MIDDLE DOT + u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0x00e4 -> PER MILLE SIGN + u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0x00f0 -> Apple logo + u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\uf8a0' # 0x00f5 -> undefined1 + u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0x00f7 -> SMALL TILDE + u'\xaf' # 0x00f8 -> MACRON + u'\u02d8' # 0x00f9 -> BREVE + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u02da' # 0x00fb -> RING ABOVE + u'\xb8' # 0x00fc -> CEDILLA + u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00fe -> OGONEK + u'\u02c7' # 0x00ff -> CARON +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a8: 0x00ac, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00af: 0x00f8, # MACRON + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00ab, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00b7: 0x00e1, # MIDDLE DOT + 0x00b8: 0x00fc, # CEDILLA + 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00c0, # INVERTED QUESTION MARK + 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00ae, # LATIN CAPITAL LETTER AE + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00be, # LATIN SMALL LETTER AE + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00da, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00db, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x00dc, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00dd, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x015e: 0x00de, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00df, # LATIN SMALL LETTER S WITH CEDILLA + 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02c7: 0x00ff, # CARON + 0x02d8: 0x00f9, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02da: 0x00fb, # RING ABOVE + 0x02db: 0x00fe, # OGONEK + 0x02dc: 0x00f7, # SMALL TILDE + 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT + 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA + 0x03c0: 0x00b9, # GREEK SMALL LETTER PI + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2021: 0x00e0, # DOUBLE DAGGER + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x00e4, # PER MILLE SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL + 0x2206: 0x00c6, # INCREMENT + 0x220f: 0x00b8, # N-ARY PRODUCT + 0x2211: 0x00b7, # N-ARY SUMMATION + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x222b: 0x00ba, # INTEGRAL + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0x00d7, # LOZENGE + 0xf8a0: 0x00f5, # undefined1 + 0xf8ff: 0x00f0, # Apple logo +}
\ No newline at end of file |