summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2001-01-06 15:09:57 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2001-01-06 15:09:57 (GMT)
commit1109db443efdc2451a01c3734c08fc224e9afa7e (patch)
treed7991f633ef0c347cc574ca9c3239645198d544a
parentec233e58038b222ec4cedc07ec46bed1f40468d7 (diff)
downloadcpython-1109db443efdc2451a01c3734c08fc224e9afa7e.zip
cpython-1109db443efdc2451a01c3734c08fc224e9afa7e.tar.gz
cpython-1109db443efdc2451a01c3734c08fc224e9afa7e.tar.bz2
This patch adds a new feature to the builtin charmap codec:
the mapping dictionaries can now contain 1-n mappings, meaning that character ordinals may be mapped to strings or Unicode object, e.g. 0x0078 ('x') -> u"abc", causing the ordinal to be replaced by the complete string or Unicode object instead of just one character. Another feature introduced by the patch is that of mapping oridnals to the emtpy string. This allows removing characters. The patch is different from patch #103100 in that it does not cause a performance hit for the normal use case of 1-1 mappings. Written by Marc-Andre Lemburg, copyright assigned to Guido van Rossum.
-rw-r--r--Lib/test/output/test_charmapcodec1
-rw-r--r--Lib/test/test_charmapcodec.py65
2 files changed, 66 insertions, 0 deletions
diff --git a/Lib/test/output/test_charmapcodec b/Lib/test/output/test_charmapcodec
new file mode 100644
index 0000000..9b15894
--- /dev/null
+++ b/Lib/test/output/test_charmapcodec
@@ -0,0 +1 @@
+test_charmapcodec
diff --git a/Lib/test/test_charmapcodec.py b/Lib/test/test_charmapcodec.py
new file mode 100644
index 0000000..21251fb
--- /dev/null
+++ b/Lib/test/test_charmapcodec.py
@@ -0,0 +1,65 @@
+""" Python Character Mapping Codec test
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright 2000 Guido van Rossum.
+
+"""#"
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+
+ return codecs.charmap_decode(input,errors,decoding_map)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+
+ return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0078: u"abc",
+ "abc": 0x0078,
+})
+
+### Encoding Map
+
+encoding_map = {}
+for k,v in decoding_map.items():
+ encoding_map[v] = k
+
+
+### Tests
+
+def check(a, b):
+ if a != b:
+ print '*** check failed: %s != %s' % (repr(a), repr(b))
+
+check(unicode('abc', 'mycp'), u'abc')
+check(unicode('xdef', 'mycp'), u'abcdef')
+check(unicode('defx', 'mycp'), u'defabc')
+check(unicode('dxf', 'mycp'), u'dabcf')
+check(unicode('dxfx', 'mycp'), u'dabcfabc')
+
+check(u'abc'.encode('mycp'), 'abc')
+check(u'xdef'.encode('mycp'), 'abcdef')
+check(u'defx'.encode('mycp'), 'defabc')
+check(u'dxf'.encode('mycp'), 'dabcf')
+check(u'dxfx'.encode('mycp'), 'dabcfabc')