This patch adds a new feature to the builtin charmap codec:

The mapping dictionaries can now contain 1-n mappings, meaning that character ordinals may be mapped to strings or Unicode object, e.g. 0x0078 ('x') -> u"abc", causing the ordinal to be replaced by the complete string or Unicode object instead of just one character. Another feature introduced by the patch is that of mapping oridnals to the emtpy string. This allows removing characters. The patch is different from patch #103100 in that it does not cause a performance hit for the normal use case of 1-1 mappings. Written by Marc-Andre Lemburg, copyright assigned to Guido van Rossum.
author: Marc-André Lemburg <mal@egenix.com> 2001-01-06 14:59:58 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2001-01-06 14:59:58 (GMT)
commit: ec233e58038b222ec4cedc07ec46bed1f40468d7 (patch)
tree: 0aab9637ac294ad74eb9f9aada4633de3096dcef /Objects/unicodeobject.c
parent: 2b287763695c11594bb26e8d49d030855e4663fe (diff)
download: cpython-ec233e58038b222ec4cedc07ec46bed1f40468d7.zip
cpython-ec233e58038b222ec4cedc07ec46bed1f40468d7.tar.gz
cpython-ec233e58038b222ec4cedc07ec46bed1f40468d7.tar.bz2
1 files changed, 48 insertions, 8 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index b9e457d..8f7b354 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1947,6 +1947,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 {
     PyUnicodeObject *v;
     Py_UNICODE *p;
+    int extrachars = 0;
     
     /* Default to Latin-1 */
     if (mapping == NULL)
@@ -1998,14 +1999,33 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 	    }
 	}
 	else if (PyUnicode_Check(x)) {
-	    if (PyUnicode_GET_SIZE(x) != 1) {
+	    int targetsize = PyUnicode_GET_SIZE(x);
+
+	    if (targetsize == 1)
+		/* 1-1 mapping */
+		*p++ = *PyUnicode_AS_UNICODE(x);
+
+	    else if (targetsize > 1) {
 		/* 1-n mapping */
-		PyErr_SetString(PyExc_NotImplementedError,
-				"1-n mappings are currently not implemented");
+		if (targetsize > extrachars) {
+		    /* resize first */
+		    int oldpos = (int)(p - PyUnicode_AS_UNICODE(v));
+		    int needed = (targetsize - extrachars) + \
+			         (targetsize << 2);
+		    extrachars += needed;
+		    if (_PyUnicode_Resize(v, PyUnicode_GET_SIZE(v) + needed)) {
 		Py_DECREF(x);
 		goto onError;
 	    }
-	    *p++ = *PyUnicode_AS_UNICODE(x);
+		    p = PyUnicode_AS_UNICODE(v) + oldpos;
+		}
+		Py_UNICODE_COPY(p,
+				PyUnicode_AS_UNICODE(x),
+				targetsize);
+		p += targetsize;
+		extrachars -= targetsize;
+	    }
+	    /* 1-0 mapping: skip the character */
 	}
 	else {
 	    /* wrong return value */
@@ -2063,6 +2083,7 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
 {
     PyObject *v;
     char *s;
+    int extrachars = 0;
 
     /* Default to Latin-1 */
     if (mapping == NULL)
@@ -2114,14 +2135,33 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
 	    }
 	}
 	else if (PyString_Check(x)) {
-	    if (PyString_GET_SIZE(x) != 1) {
+	    int targetsize = PyString_GET_SIZE(x);
+
+	    if (targetsize == 1)
+		/* 1-1 mapping */
+		*s++ = *PyString_AS_STRING(x);
+
+	    else if (targetsize > 1) {
 		/* 1-n mapping */
-		PyErr_SetString(PyExc_NotImplementedError,
-		      "1-n mappings are currently not implemented");
+		if (targetsize > extrachars) {
+		    /* resize first */
+		    int oldpos = (int)(s - PyString_AS_STRING(v));
+		    int needed = (targetsize - extrachars) + \
+			         (targetsize << 2);
+		    extrachars += needed;
+		    if (_PyString_Resize(&v, PyString_GET_SIZE(v) + needed)) {
 		Py_DECREF(x);
 		goto onError;
 	    }
-	    *s++ = *PyString_AS_STRING(x);
+		    s = PyString_AS_STRING(v) + oldpos;
+		}
+		memcpy(s,
+		       PyString_AS_STRING(x),
+		       targetsize);
+		s += targetsize;
+		extrachars -= targetsize;
+	    }
+	    /* 1-0 mapping: skip the character */
 	}
 	else {
 	    /* wrong return value */
author	Marc-André Lemburg <mal@egenix.com>	2001-01-06 14:59:58 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2001-01-06 14:59:58 (GMT)
commit	ec233e58038b222ec4cedc07ec46bed1f40468d7 (patch)
tree	0aab9637ac294ad74eb9f9aada4633de3096dcef /Objects/unicodeobject.c
parent	2b287763695c11594bb26e8d49d030855e4663fe (diff)
download	cpython-ec233e58038b222ec4cedc07ec46bed1f40468d7.zip cpython-ec233e58038b222ec4cedc07ec46bed1f40468d7.tar.gz cpython-ec233e58038b222ec4cedc07ec46bed1f40468d7.tar.bz2