summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/cp1256.py
blob: f3e694c92071b54138888223bb62295e6556ab69 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.

Written by Marc-Andre Lemburg (mal@lemburg.com).

(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.

"""#"

import codecs

### Codec APIs

class Codec(codecs.Codec):

    def encode(self,input,errors='strict'):

        return codecs.charmap_encode(input,errors,encoding_map)

    def decode(self,input,errors='strict'):

        return codecs.charmap_decode(input,errors,decoding_map)

class StreamWriter(Codec,codecs.StreamWriter):
    pass

class StreamReader(Codec,codecs.StreamReader):
    pass

### encodings module API

def getregentry():

    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)

### Decoding Map

decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
        0x0080: 0x20ac, # EURO SIGN
        0x0081: 0x067e, # ARABIC LETTER PEH
        0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
        0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
        0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
        0x0085: 0x2026, # HORIZONTAL ELLIPSIS
        0x0086: 0x2020, # DAGGER
        0x0087: 0x2021, # DOUBLE DAGGER
        0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
        0x0089: 0x2030, # PER MILLE SIGN
        0x008a: 0x0679, # ARABIC LETTER TTEH
        0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
        0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
        0x008d: 0x0686, # ARABIC LETTER TCHEH
        0x008e: 0x0698, # ARABIC LETTER JEH
        0x008f: 0x0688, # ARABIC LETTER DDAL
        0x0090: 0x06af, # ARABIC LETTER GAF
        0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
        0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
        0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
        0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
        0x0095: 0x2022, # BULLET
        0x0096: 0x2013, # EN DASH
        0x0097: 0x2014, # EM DASH
        0x0098: 0x06a9, # ARABIC LETTER KEHEH
        0x0099: 0x2122, # TRADE MARK SIGN
        0x009a: 0x0691, # ARABIC LETTER RREH
        0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
        0x009c: 0x0153, # LATIN SMALL LIGATURE OE
        0x009d: 0x200c, # ZERO WIDTH NON-JOINER
        0x009e: 0x200d, # ZERO WIDTH JOINER
        0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA
        0x00a1: 0x060c, # ARABIC COMMA
        0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE
        0x00ba: 0x061b, # ARABIC SEMICOLON
        0x00bf: 0x061f, # ARABIC QUESTION MARK
        0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL
        0x00c1: 0x0621, # ARABIC LETTER HAMZA
        0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
        0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
        0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
        0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
        0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
        0x00c7: 0x0627, # ARABIC LETTER ALEF
        0x00c8: 0x0628, # ARABIC LETTER BEH
        0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
        0x00ca: 0x062a, # ARABIC LETTER TEH
        0x00cb: 0x062b, # ARABIC LETTER THEH
        0x00cc: 0x062c, # ARABIC LETTER JEEM
        0x00cd: 0x062d, # ARABIC LETTER HAH
        0x00ce: 0x062e, # ARABIC LETTER KHAH
        0x00cf: 0x062f, # ARABIC LETTER DAL
        0x00d0: 0x0630, # ARABIC LETTER THAL
        0x00d1: 0x0631, # ARABIC LETTER REH
        0x00d2: 0x0632, # ARABIC LETTER ZAIN
        0x00d3: 0x0633, # ARABIC LETTER SEEN
        0x00d4: 0x0634, # ARABIC LETTER SHEEN
        0x00d5: 0x0635, # ARABIC LETTER SAD
        0x00d6: 0x0636, # ARABIC LETTER DAD
        0x00d8: 0x0637, # ARABIC LETTER TAH
        0x00d9: 0x0638, # ARABIC LETTER ZAH
        0x00da: 0x0639, # ARABIC LETTER AIN
        0x00db: 0x063a, # ARABIC LETTER GHAIN
        0x00dc: 0x0640, # ARABIC TATWEEL
        0x00dd: 0x0641, # ARABIC LETTER FEH
        0x00de: 0x0642, # ARABIC LETTER QAF
        0x00df: 0x0643, # ARABIC LETTER KAF
        0x00e1: 0x0644, # ARABIC LETTER LAM
        0x00e3: 0x0645, # ARABIC LETTER MEEM
        0x00e4: 0x0646, # ARABIC LETTER NOON
        0x00e5: 0x0647, # ARABIC LETTER HEH
        0x00e6: 0x0648, # ARABIC LETTER WAW
        0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA
        0x00ed: 0x064a, # ARABIC LETTER YEH
        0x00f0: 0x064b, # ARABIC FATHATAN
        0x00f1: 0x064c, # ARABIC DAMMATAN
        0x00f2: 0x064d, # ARABIC KASRATAN
        0x00f3: 0x064e, # ARABIC FATHA
        0x00f5: 0x064f, # ARABIC DAMMA
        0x00f6: 0x0650, # ARABIC KASRA
        0x00f8: 0x0651, # ARABIC SHADDA
        0x00fa: 0x0652, # ARABIC SUKUN
        0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
        0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
        0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
})

### Encoding Map

encoding_map = codecs.make_encoding_map(decoding_map)