From 94c2c75b5eda49ccbb01778f9ab188fc1dbc1ca2 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Tue, 23 Oct 2007 06:52:59 +0000 Subject: Patch #1071: Improve unicode.translate() so that you can pass unicode characters as mapping keys and invalid mapping keys are recognized and raise an error. --- Doc/library/stdtypes.rst | 2 +- Lib/test/test_unicode.py | 4 +++- Objects/unicodeobject.c | 52 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 61b589f..06eb1cf 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -931,7 +931,7 @@ functions based on regular expressions. Return a copy of the *s* where all characters have been mapped through the *map* which must be a dictionary of characters (strings of length 1) or Unicode ordinals (integers) to Unicode ordinals, strings or ``None``. - Unmapped characters are left untouched. Characters mapped to ``None`` are + Unmapped characters are left untouched. Characters mapped to ``None`` are deleted. .. note:: diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 04dddaa..9aad59a 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -160,12 +160,14 @@ class UnicodeTest( self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None}) self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')}) self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'}) - self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''}) + self.checkequalnofix('c', 'abababc', 'translate', {'a':None, 'b':''}) self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''}) self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'}) self.assertRaises(TypeError, 'hello'.translate) self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') + self.assertRaises(ValueError, 'abababc'.translate, {'xy':2}) + self.assertRaises(TypeError, 'abababc'.translate, {(1,):2}) def test_split(self): string_tests.CommonTest.test_split(self) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 13644b0..61a2320 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7810,10 +7810,54 @@ are deleted."); static PyObject* unicode_translate(PyUnicodeObject *self, PyObject *table) { - return PyUnicode_TranslateCharmap(self->str, - self->length, - table, - "ignore"); + PyObject *newtable = NULL; + Py_ssize_t i = 0; + PyObject *key, *value, *result; + + if (!PyDict_Check(table)) { + PyErr_SetString(PyExc_TypeError, "translate argument must be a dict"); + return NULL; + } + /* fixup the table -- allow size-1 string keys instead of only int keys */ + newtable = PyDict_Copy(table); + if (!newtable) return NULL; + while (PyDict_Next(table, &i, &key, &value)) { + if (PyUnicode_Check(key)) { + /* convert string keys to integer keys */ + PyObject *newkey; + int res; + if (PyUnicode_GET_SIZE(key) != 1) { + PyErr_SetString(PyExc_ValueError, "string items in translate " + "table must be 1 element long"); + goto err; + } + newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]); + if (!newkey) + goto err; + res = PyDict_SetItem(newtable, newkey, value); + Py_DECREF(newkey); + if (res < 0) + goto err; + } else if (PyInt_Check(key)) { + /* just keep integer keys */ + if (PyDict_SetItem(newtable, key, value) < 0) + goto err; + } else { + PyErr_SetString(PyExc_TypeError, "items in translate table must be " + "strings or integers"); + goto err; + } + } + + result = PyUnicode_TranslateCharmap(self->str, + self->length, + newtable, + "ignore"); + Py_DECREF(newtable); + return result; + err: + Py_DECREF(newtable); + return NULL; } PyDoc_STRVAR(upper__doc__, -- cgit v0.12