diff options
56 files changed, 424 insertions, 293 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index fca0f8e..9931137 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): sr.file_encoding = file_encoding return sr +### Helpers for charmap-based codecs + +def make_identity_dict(rng): + + """ make_identity_dict(rng) -> dict + + Return a dictionary where elements of the rng sequence are + mapped to themselves. + + """ + res = {} + for i in rng: + res[i]=i + return res + ### Tests if __name__ == '__main__': diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py index d60504c..5868372 100644 --- a/Lib/encodings/cp037.py +++ b/Lib/encodings/cp037.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP037.TXT'. - +""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1006.py b/Lib/encodings/cp1006.py index 991feed..593fbb6 100644 --- a/Lib/encodings/cp1006.py +++ b/Lib/encodings/cp1006.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1006.TXT'. - +""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO @@ -131,7 +131,7 @@ decoding_map = { 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1026.py b/Lib/encodings/cp1026.py index ae8086f..3796a75 100644 --- a/Lib/encodings/cp1026.py +++ b/Lib/encodings/cp1026.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1026.TXT'. - +""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1250.py b/Lib/encodings/cp1250.py index d1276c4..03a3e31 100644 --- a/Lib/encodings/cp1250.py +++ b/Lib/encodings/cp1250.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1250.TXT'. - +""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -116,7 +116,7 @@ decoding_map = { 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1251.py b/Lib/encodings/cp1251.py index 42921e4..e27a122 100644 --- a/Lib/encodings/cp1251.py +++ b/Lib/encodings/cp1251.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1251.TXT'. - +""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -150,7 +150,7 @@ decoding_map = { 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1252.py b/Lib/encodings/cp1252.py index 07a5358..5d7bdd6 100644 --- a/Lib/encodings/cp1252.py +++ b/Lib/encodings/cp1252.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1252.TXT'. - +""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -69,7 +69,7 @@ decoding_map = { 0x009d: None, # UNDEFINED 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1253.py b/Lib/encodings/cp1253.py index c84808a..abc144c 100644 --- a/Lib/encodings/cp1253.py +++ b/Lib/encodings/cp1253.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1253.TXT'. - +""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -144,7 +144,7 @@ decoding_map = { 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1254.py b/Lib/encodings/cp1254.py index 9897ecf..4a2ab3c 100644 --- a/Lib/encodings/cp1254.py +++ b/Lib/encodings/cp1254.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1254.TXT'. - +""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -75,7 +75,7 @@ decoding_map = { 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1255.py b/Lib/encodings/cp1255.py index 5404b46..c987b85 100644 --- a/Lib/encodings/cp1255.py +++ b/Lib/encodings/cp1255.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1255.TXT'. - +""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -136,7 +136,7 @@ decoding_map = { 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1256.py b/Lib/encodings/cp1256.py index 6bb02dd..d72c5bc 100644 --- a/Lib/encodings/cp1256.py +++ b/Lib/encodings/cp1256.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1256.TXT'. - +""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: 0x067e, # ARABIC LETTER PEH 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -122,7 +122,7 @@ decoding_map = { 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1257.py b/Lib/encodings/cp1257.py index ded826c..d17a904 100644 --- a/Lib/encodings/cp1257.py +++ b/Lib/encodings/cp1257.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1257.TXT'. - +""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -124,7 +124,7 @@ decoding_map = { 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1258.py b/Lib/encodings/cp1258.py index 955253c..597f124 100644 --- a/Lib/encodings/cp1258.py +++ b/Lib/encodings/cp1258.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1258.TXT'. - +""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -83,7 +83,7 @@ decoding_map = { 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN 0x00fe: 0x20ab, # DONG SIGN -} +}) ### Encoding Map diff --git a/Lib/encodings/cp424.py b/Lib/encodings/cp424.py index c4abaec..bc10379 100644 --- a/Lib/encodings/cp424.py +++ b/Lib/encodings/cp424.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP424.TXT'. - +""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # SELECT 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # REQUIRED NEW LINE @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: None, # UNDEFINED 0x00fe: None, # UNDEFINED 0x00ff: 0x009f, # EIGHT ONES -} +}) ### Encoding Map diff --git a/Lib/encodings/cp437.py b/Lib/encodings/cp437.py index ca7d90e..db1b88a 100644 --- a/Lib/encodings/cp437.py +++ b/Lib/encodings/cp437.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP437.TXT'. - +""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py index 33d6fed..1c8fb57 100644 --- a/Lib/encodings/cp500.py +++ b/Lib/encodings/cp500.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP500.TXT'. - +""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/cp737.py b/Lib/encodings/cp737.py index e55b3dd..03665ae 100644 --- a/Lib/encodings/cp737.py +++ b/Lib/encodings/cp737.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP737.TXT'. - +""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp775.py b/Lib/encodings/cp775.py index e43ce2d1..b38ccb5 100644 --- a/Lib/encodings/cp775.py +++ b/Lib/encodings/cp775.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP775.TXT'. - +""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp850.py b/Lib/encodings/cp850.py index cb0918c..e26287b 100644 --- a/Lib/encodings/cp850.py +++ b/Lib/encodings/cp850.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP850.TXT'. - +""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp852.py b/Lib/encodings/cp852.py index ba4f142..431d844 100644 --- a/Lib/encodings/cp852.py +++ b/Lib/encodings/cp852.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP852.TXT'. - +""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp855.py b/Lib/encodings/cp855.py index c967bcf..c9e7168 100644 --- a/Lib/encodings/cp855.py +++ b/Lib/encodings/cp855.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP855.TXT'. - +""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00a7, # SECTION SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp856.py b/Lib/encodings/cp856.py index f384acb..cc2e01f 100644 --- a/Lib/encodings/cp856.py +++ b/Lib/encodings/cp856.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP856.TXT'. - +""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET 0x0082: 0x05d2, # HEBREW LETTER GIMEL @@ -120,10 +120,10 @@ decoding_map = { 0x00d0: None, # UNDEFINED 0x00d1: None, # UNDEFINED 0x00d2: None, # UNDEFINED - 0x00d3: None, # UNDEFINED + 0x00d3: None, # UNDEFINEDS 0x00d4: None, # UNDEFINED 0x00d5: None, # UNDEFINED - 0x00d6: None, # UNDEFINED + 0x00d6: None, # UNDEFINEDE 0x00d7: None, # UNDEFINED 0x00d8: None, # UNDEFINED 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp857.py b/Lib/encodings/cp857.py index 49cc685..6f4df23 100644 --- a/Lib/encodings/cp857.py +++ b/Lib/encodings/cp857.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP857.TXT'. - +""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -164,7 +164,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp860.py b/Lib/encodings/cp860.py index 3b9a15d..057d918 100644 --- a/Lib/encodings/cp860.py +++ b/Lib/encodings/cp860.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP860.TXT'. - +""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp861.py b/Lib/encodings/cp861.py index 3f07fba..8db3b40 100644 --- a/Lib/encodings/cp861.py +++ b/Lib/encodings/cp861.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP861.TXT'. - +""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp862.py b/Lib/encodings/cp862.py index 4bc1cbe..1cac3e2 100644 --- a/Lib/encodings/cp862.py +++ b/Lib/encodings/cp862.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP862.TXT'. - +""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET 0x0082: 0x05d2, # HEBREW LETTER GIMEL @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp863.py b/Lib/encodings/cp863.py index 3e6103f..ecdc391 100644 --- a/Lib/encodings/cp863.py +++ b/Lib/encodings/cp863.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP863.TXT'. - +""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp864.py b/Lib/encodings/cp864.py index 8193278..861fb00 100644 --- a/Lib/encodings/cp864.py +++ b/Lib/encodings/cp864.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP864.TXT'. - +""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0025: 0x066a, # ARABIC PERCENT SIGN 0x0080: 0x00b0, # DEGREE SIGN 0x0081: 0x00b7, # MIDDLE DOT @@ -163,7 +163,7 @@ decoding_map = { 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp865.py b/Lib/encodings/cp865.py index eaed7a9..4d9010d 100644 --- a/Lib/encodings/cp865.py +++ b/Lib/encodings/cp865.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP865.TXT'. - +""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py index 25e1a50..6a8b0b0 100644 --- a/Lib/encodings/cp866.py +++ b/Lib/encodings/cp866.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP866.TXT'. - +""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00a4, # CURRENCY SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp869.py b/Lib/encodings/cp869.py index 840335f..65d2b2e 100644 --- a/Lib/encodings/cp869.py +++ b/Lib/encodings/cp869.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP869.TXT'. - +""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: None, # UNDEFINED 0x0081: None, # UNDEFINED 0x0082: None, # UNDEFINED @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp874.py b/Lib/encodings/cp874.py index 0231c7a..31f4d3d 100644 --- a/Lib/encodings/cp874.py +++ b/Lib/encodings/cp874.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP874.TXT'. - +""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: None, # UNDEFINED @@ -164,7 +164,7 @@ decoding_map = { 0x00fd: None, # UNDEFINED 0x00fe: None, # UNDEFINED 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp875.py b/Lib/encodings/cp875.py index 924c0a0..3500446 100644 --- a/Lib/encodings/cp875.py +++ b/Lib/encodings/cp875.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP875.TXT'. - +""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -274,7 +274,7 @@ decoding_map = { 0x00fd: 0x001a, # SUBSTITUTE 0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py index 7355853..f4c0bf7 100644 --- a/Lib/encodings/iso8859_1.py +++ b/Lib/encodings/iso8859_1.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-1.TXT'. - +""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -22,10 +22,7 @@ class Codec(codecs.Codec): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): - - def __init__(self,stream,errors='strict'): - - codecs.StreamWriter.__init__(self,strict,errors) + pass class StreamReader(Codec,codecs.StreamReader): pass @@ -38,9 +35,9 @@ def getregentry(): ### Decoding Map -decoding_map = { - -} +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_10.py b/Lib/encodings/iso8859_10.py index 96b435c..c43c653 100644 --- a/Lib/encodings/iso8859_10.py +++ b/Lib/encodings/iso8859_10.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-10.TXT'. - +""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA @@ -83,7 +83,7 @@ decoding_map = { 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK 0x00ff: 0x0138, # LATIN SMALL LETTER KRA -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_13.py b/Lib/encodings/iso8859_13.py index d8b2230..2ab5292 100644 --- a/Lib/encodings/iso8859_13.py +++ b/Lib/encodings/iso8859_13.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-13.TXT'. - +""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE @@ -93,7 +93,7 @@ decoding_map = { 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_14.py b/Lib/encodings/iso8859_14.py index 8ee0aa9..5533e96 100644 --- a/Lib/encodings/iso8859_14.py +++ b/Lib/encodings/iso8859_14.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-14.TXT'. - +""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE @@ -68,7 +68,7 @@ decoding_map = { 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_15.py b/Lib/encodings/iso8859_15.py index 862ff28..7bffff4 100644 --- a/Lib/encodings/iso8859_15.py +++ b/Lib/encodings/iso8859_15.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-15.TXT'. - +""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a4: 0x20ac, # EURO SIGN 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON @@ -45,7 +45,7 @@ decoding_map = { 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_2.py b/Lib/encodings/iso8859_2.py index 034001a..481f9a0 100644 --- a/Lib/encodings/iso8859_2.py +++ b/Lib/encodings/iso8859_2.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-2.TXT'. - +""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x02d8, # BREVE 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE @@ -94,7 +94,7 @@ decoding_map = { 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_3.py b/Lib/encodings/iso8859_3.py index f262767..c2820ad 100644 --- a/Lib/encodings/iso8859_3.py +++ b/Lib/encodings/iso8859_3.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-3.TXT'. - +""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,15 +35,17 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE 0x00a2: 0x02d8, # BREVE + 0x00a5: None, 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x00ae: None, 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX @@ -51,21 +53,26 @@ decoding_map = { 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x00be: None, 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: None, 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x00d0: None, 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x00e3: None, 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x00f0: None, 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_4.py b/Lib/encodings/iso8859_4.py index 29f9fd3..30d6ca6 100644 --- a/Lib/encodings/iso8859_4.py +++ b/Lib/encodings/iso8859_4.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-4.TXT'. - +""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x0138, # LATIN SMALL LETTER KRA 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA @@ -87,7 +87,7 @@ decoding_map = { 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_5.py b/Lib/encodings/iso8859_5.py index d71c15f..2bdaa50 100644 --- a/Lib/encodings/iso8859_5.py +++ b/Lib/encodings/iso8859_5.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-5.TXT'. - +""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE @@ -131,7 +131,7 @@ decoding_map = { 0x00fd: 0x00a7, # SECTION SIGN 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_6.py b/Lib/encodings/iso8859_6.py index b4d4315..585fa11 100644 --- a/Lib/encodings/iso8859_6.py +++ b/Lib/encodings/iso8859_6.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-6.TXT'. - +""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,11 +35,38 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x00a1: None, + 0x00a2: None, + 0x00a3: None, + 0x00a5: None, + 0x00a6: None, + 0x00a7: None, + 0x00a8: None, + 0x00a9: None, + 0x00aa: None, + 0x00ab: None, 0x00ac: 0x060c, # ARABIC COMMA + 0x00ae: None, + 0x00af: None, + 0x00b0: None, + 0x00b1: None, + 0x00b2: None, + 0x00b3: None, + 0x00b4: None, + 0x00b5: None, + 0x00b6: None, + 0x00b7: None, + 0x00b8: None, + 0x00b9: None, + 0x00ba: None, 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: None, + 0x00bd: None, + 0x00be: None, 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: None, 0x00c1: 0x0621, # ARABIC LETTER HAMZA 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE @@ -66,6 +93,11 @@ decoding_map = { 0x00d8: 0x0638, # ARABIC LETTER ZAH 0x00d9: 0x0639, # ARABIC LETTER AIN 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: None, 0x00e0: 0x0640, # ARABIC TATWEEL 0x00e1: 0x0641, # ARABIC LETTER FEH 0x00e2: 0x0642, # ARABIC LETTER QAF @@ -85,7 +117,20 @@ decoding_map = { 0x00f0: 0x0650, # ARABIC KASRA 0x00f1: 0x0651, # ARABIC SHADDA 0x00f2: 0x0652, # ARABIC SUKUN -} + 0x00f3: None, + 0x00f4: None, + 0x00f5: None, + 0x00f6: None, + 0x00f7: None, + 0x00f8: None, + 0x00f9: None, + 0x00fa: None, + 0x00fb: None, + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_7.py b/Lib/encodings/iso8859_7.py index c847610..48f1bd5 100644 --- a/Lib/encodings/iso8859_7.py +++ b/Lib/encodings/iso8859_7.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-7.TXT'. - +""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,10 +35,14 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a4: None, + 0x00a5: None, + 0x00aa: None, + 0x00ae: None, 0x00af: 0x2015, # HORIZONTAL BAR 0x00b4: 0x0384, # GREEK TONOS 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS @@ -67,6 +71,7 @@ decoding_map = { 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON @@ -111,7 +116,8 @@ decoding_map = { 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS -} + 0x00ff: None, +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_8.py b/Lib/encodings/iso8859_8.py index 72b783b..a19aa67 100644 --- a/Lib/encodings/iso8859_8.py +++ b/Lib/encodings/iso8859_8.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-8.TXT'. - +""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,11 +35,43 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x00a1: None, 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00af: 0x203e, # OVERLINE 0x00ba: 0x00f7, # DIVISION SIGN + 0x00bf: None, + 0x00c0: None, + 0x00c1: None, + 0x00c2: None, + 0x00c3: None, + 0x00c4: None, + 0x00c5: None, + 0x00c6: None, + 0x00c7: None, + 0x00c8: None, + 0x00c9: None, + 0x00ca: None, + 0x00cb: None, + 0x00cc: None, + 0x00cd: None, + 0x00ce: None, + 0x00cf: None, + 0x00d0: None, + 0x00d1: None, + 0x00d2: None, + 0x00d3: None, + 0x00d4: None, + 0x00d5: None, + 0x00d6: None, + 0x00d7: None, + 0x00d8: None, + 0x00d9: None, + 0x00da: None, + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, 0x00df: 0x2017, # DOUBLE LOW LINE 0x00e0: 0x05d0, # HEBREW LETTER ALEF 0x00e1: 0x05d1, # HEBREW LETTER BET @@ -68,7 +100,12 @@ decoding_map = { 0x00f8: 0x05e8, # HEBREW LETTER RESH 0x00f9: 0x05e9, # HEBREW LETTER SHIN 0x00fa: 0x05ea, # HEBREW LETTER TAV -} + 0x00fb: None, + 0x00fc: None, + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_9.py b/Lib/encodings/iso8859_9.py index 3f91d32..a278905 100644 --- a/Lib/encodings/iso8859_9.py +++ b/Lib/encodings/iso8859_9.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-9.TXT'. - +""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,15 +35,15 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA -} +}) ### Encoding Map diff --git a/Lib/encodings/koi8_r.py b/Lib/encodings/koi8_r.py index 0e1c15b..c28004e 100644 --- a/Lib/encodings/koi8_r.py +++ b/Lib/encodings/koi8_r.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'KOI8-R.TXT'. - +""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_cyrillic.py b/Lib/encodings/mac_cyrillic.py index 1314836..4552831 100644 --- a/Lib/encodings/mac_cyrillic.py +++ b/Lib/encodings/mac_cyrillic.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'. - +""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE @@ -160,7 +160,7 @@ decoding_map = { 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ff: 0x00a4, # CURRENCY SIGN -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_greek.py b/Lib/encodings/mac_greek.py index 7673b83..b7040c4 100644 --- a/Lib/encodings/mac_greek.py +++ b/Lib/encodings/mac_greek.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'GREEK.TXT'. - +""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00b9, # SUPERSCRIPT ONE 0x0082: 0x00b2, # SUPERSCRIPT TWO @@ -163,7 +163,7 @@ decoding_map = { 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_iceland.py b/Lib/encodings/mac_iceland.py index 62e1f63..f20e134 100644 --- a/Lib/encodings/mac_iceland.py +++ b/Lib/encodings/mac_iceland.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'ICELAND.TXT'. - +""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA @@ -159,7 +159,7 @@ decoding_map = { 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_latin2.py b/Lib/encodings/mac_latin2.py index 7e64959..0fba502 100644 --- a/Lib/encodings/mac_latin2.py +++ b/Lib/encodings/mac_latin2.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'LATIN2.TXT'. - +""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON @@ -163,7 +163,7 @@ decoding_map = { 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_roman.py b/Lib/encodings/mac_roman.py index 9147e93..6d048a3 100644 --- a/Lib/encodings/mac_roman.py +++ b/Lib/encodings/mac_roman.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'ROMAN.TXT'. - +""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA @@ -160,7 +160,7 @@ decoding_map = { 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_turkish.py b/Lib/encodings/mac_turkish.py index 76b6bbe..c81a864 100644 --- a/Lib/encodings/mac_turkish.py +++ b/Lib/encodings/mac_turkish.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'TURKISH.TXT'. - +""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA @@ -160,7 +160,7 @@ decoding_map = { 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 05aecd8..579bab1 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -494,14 +494,15 @@ for encoding in ( 'cp852', 'cp855', 'cp860', 'cp861', 'cp862', 'cp863', 'cp865', 'cp866', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', - 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', - 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', + 'iso8859_2', 'iso8859_4', 'iso8859_5', + 'iso8859_9', 'koi8_r', 'latin_1', 'mac_cyrillic', 'mac_latin2', ### These have undefined mappings: #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', #'cp1256', 'cp1257', 'cp1258', #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', + #'iso8859_3', 'iso8859_6', 'iso8859_7', #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', ### These fail the round-trip: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fe591b5..b9e457d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, Py_DECREF(w); if (x == NULL) { if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found: default to Latin-1 mapping */ + /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - *p++ = (Py_UNICODE)ch; - continue; - } + x = Py_None; + Py_INCREF(x); + } else goto onError; } @@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, Py_DECREF(w); if (x == NULL) { if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found: default to Latin-1 mapping if possible */ + /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - if (ch < 256) { - *s++ = (char)ch; - continue; - } - else if (!charmap_encoding_error(&p, &s, errors, - "missing character mapping")) - continue; - } + x = Py_None; + Py_INCREF(x); + } else goto onError; } diff --git a/Tools/scripts/gencodec.py b/Tools/scripts/gencodec.py index 45b69b0..39b42ff 100644 --- a/Tools/scripts/gencodec.py +++ b/Tools/scripts/gencodec.py @@ -1,9 +1,9 @@ """ Unicode Mapping Parser and Codec Generator. This script parses Unicode mapping files as available from the Unicode -site (ftp.unicode.org) and creates Python codec modules from them. The -codecs use the standard character mapping codec to actually apply the -mapping. +site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec +modules from them. The codecs use the standard character mapping codec +to actually apply the mapping. Synopsis: gencodec.py dir codec_prefix @@ -18,6 +18,7 @@ same location (with .mapping extension). Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright Guido van Rossum, 2000. """#" @@ -70,6 +71,10 @@ def readmap(filename, lines = f.readlines() f.close() enc2uni = {} + identity = [] + unmapped = range(256) + for i in range(256): + unmapped[i] = i for line in lines: line = strip(line) if not line or line[0] == '#': @@ -85,8 +90,22 @@ def readmap(filename, comment = '' else: comment = comment[1:] - if enc != uni: + if enc < 256: + unmapped.remove(enc) + if enc == uni: + identity.append(enc) + else: + enc2uni[enc] = (uni,comment) + else: enc2uni[enc] = (uni,comment) + # If there are more identity-mapped entries than unmapped entries, + # it pays to generate an identity dictionary first, add add explicit + # mappings to None for the rest + if len(identity)>=len(unmapped): + for enc in unmapped: + enc2uni[enc] = (None, "") + enc2uni['IDENTITY'] = 256 + return enc2uni def hexrepr(t, @@ -143,11 +162,12 @@ def codegen(name,map,comments=1): """ l = [ '''\ -""" Python Character Mapping Codec generated from '%s'. +""" Python Character Mapping Codec generated from '%s' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -178,15 +198,23 @@ def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map - -decoding_map = { ''' % name, ] + + if map.has_key("IDENTITY"): + l.append("decoding_map = codecs.make_identity_dict(range(%d))" + % map["IDENTITY"]) + l.append("decoding_map.update({") + splits = 1 + del map["IDENTITY"] + else: + l.append("decoding_map = {") + splits = 0 + mappings = map.items() mappings.sort() append = l.append i = 0 - splits = 0 for e,value in mappings: try: (u,c) = value @@ -198,7 +226,7 @@ decoding_map = { append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c)) else: append('\t%s: %s,' % (key,unicoderepr(u))) - i = i + 1 + i += 1 if i == 4096: # Split the definition into parts to that the Python # parser doesn't dump core @@ -206,7 +234,7 @@ decoding_map = { append('}') else: append('})') - append('map.update({') + append('decoding_map.update({') i = 0 splits = splits + 1 if splits == 0: @@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1): mapnames = os.listdir(dir) for mapname in mapnames: - if mapname[-len('.mapping'):] != '.mapping': + if not mapname.endswith('.mapping'): continue codefile = mapname[:-len('.mapping')] + '.py' print 'converting %s to %s' % (mapname, |