summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/stdtypes.rst29
-rw-r--r--Lib/test/test_unicode.py37
-rw-r--r--Objects/unicodeobject.c150
3 files changed, 155 insertions, 61 deletions
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 5c69ed6..4f09205 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -800,6 +800,21 @@ functions based on regular expressions.
'example.com'
+.. method:: str.maketrans(x[, y[, z]])
+
+ This static method returns a translation table usable for :meth:`str.translate`.
+
+ If there is only one argument, it must be a dictionary mapping Unicode
+ ordinals (integers) or characters (strings of length 1) to Unicode ordinals,
+ strings (of arbitrary lengths) or None. Character keys will then be
+ converted to ordinals.
+
+ If there are two arguments, they must be strings of equal length, and in the
+ resulting dictionary, each character in x will be mapped to the character at
+ the same position in y. If there is a third argument, it must be a string,
+ whose characters will be mapped to None in the result.
+
+
.. method:: str.partition(sep)
Split the string at the first occurrence of *sep*, and return a 3-tuple
@@ -934,15 +949,17 @@ functions based on regular expressions.
.. method:: str.translate(map)
Return a copy of the *s* where all characters have been mapped through the
- *map* which must be a dictionary of characters (strings of length 1) or
- Unicode ordinals (integers) to Unicode ordinals, strings or ``None``.
- Unmapped characters are left untouched. Characters mapped to ``None`` are
- deleted.
+ *map* which must be a dictionary of Unicode ordinals(integers) to Unicode
+ ordinals, strings or ``None``. Unmapped characters are left untouched.
+ Characters mapped to ``None`` are deleted.
+
+ A *map* for :meth:`translate` is usually best created by
+ :meth:`str.maketrans`.
.. note::
- A more flexible approach is to create a custom character mapping codec
- using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
+ An even more flexible approach is to create a custom character mapping
+ codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
example).
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 7475535..fe4eb85 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -166,18 +166,37 @@ class UnicodeTest(
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
- def test_translate(self):
- self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
- self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
- self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
- self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {'a':None, 'b':'<i>'})
- self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
- self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
+ def test_maketrans_translate(self):
+ # these work with plain translate()
+ self.checkequalnofix('bbbc', 'abababc', 'translate',
+ {ord('a'): None})
+ self.checkequalnofix('iiic', 'abababc', 'translate',
+ {ord('a'): None, ord('b'): ord('i')})
+ self.checkequalnofix('iiix', 'abababc', 'translate',
+ {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
+ self.checkequalnofix('c', 'abababc', 'translate',
+ {ord('a'): None, ord('b'): ''})
+ self.checkequalnofix('xyyx', 'xzx', 'translate',
+ {ord('z'): 'yy'})
+ # this needs maketrans()
+ self.checkequalnofix('abababc', 'abababc', 'translate',
+ {'b': '<i>'})
+ tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
+ self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
+ # test alternative way of calling maketrans()
+ tbl = self.type2test.maketrans('abc', 'xyz', 'd')
+ self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
+
+ self.assertRaises(TypeError, self.type2test.maketrans)
+ self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
+ self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
+ self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
+ self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
+ self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
+ self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
self.assertRaises(TypeError, 'hello'.translate)
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
- self.assertRaises(ValueError, 'abababc'.translate, {'xy':2})
- self.assertRaises(TypeError, 'abababc'.translate, {(1,):2})
def test_split(self):
string_tests.CommonTest.test_split(self)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 02b0c7a..205576f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7793,68 +7793,124 @@ unicode_swapcase(PyUnicodeObject *self)
return fixup(self, fixswapcase);
}
-PyDoc_STRVAR(translate__doc__,
-"S.translate(table) -> unicode\n\
+PyDoc_STRVAR(maketrans__doc__,
+"str.maketrans(x[, y[, z]]) -> dict (static method)\n\
\n\
-Return a copy of the string S, where all characters have been mapped\n\
-through the given translation table, which must be a mapping of\n\
-Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
-Unmapped characters are left untouched. Characters mapped to None\n\
-are deleted.");
+Return a translation table usable for str.translate().\n\
+If there is only one argument, it must be a dictionary mapping Unicode\n\
+ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
+Character keys will then be converted to ordinals.\n\
+If there are two arguments, they must be strings of equal length, and\n\
+in the resulting dictionary, each character in x will be mapped to the\n\
+character at the same position in y. If there is a third argument, it\n\
+must be a string, whose characters will be mapped to None in the result.");
static PyObject*
-unicode_translate(PyUnicodeObject *self, PyObject *table)
+unicode_maketrans(PyUnicodeObject *null, PyObject *args)
{
- PyObject *newtable = NULL;
+ PyObject *x, *y = NULL, *z = NULL;
+ PyObject *new = NULL, *key, *value;
Py_ssize_t i = 0;
- PyObject *key, *value, *result;
-
- if (!PyDict_Check(table)) {
- PyErr_SetString(PyExc_TypeError, "translate argument must be a dict");
+ int res;
+
+ if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
return NULL;
- }
- /* fixup the table -- allow size-1 string keys instead of only int keys */
- newtable = PyDict_Copy(table);
- if (!newtable) return NULL;
- while (PyDict_Next(table, &i, &key, &value)) {
- if (PyUnicode_Check(key)) {
- /* convert string keys to integer keys */
- PyObject *newkey;
- int res;
- if (PyUnicode_GET_SIZE(key) != 1) {
- PyErr_SetString(PyExc_ValueError, "string items in translate "
- "table must be 1 element long");
- goto err;
- }
- newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
- if (!newkey)
+ new = PyDict_New();
+ if (!new)
+ return NULL;
+ if (y != NULL) {
+ /* x must be a string too, of equal length */
+ Py_ssize_t ylen = PyUnicode_GET_SIZE(y);
+ if (!PyUnicode_Check(x)) {
+ PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
+ "be a string if there is a second argument");
+ goto err;
+ }
+ if (PyUnicode_GET_SIZE(x) != ylen) {
+ PyErr_SetString(PyExc_ValueError, "the first two maketrans "
+ "arguments must have equal length");
+ goto err;
+ }
+ /* create entries for translating chars in x to those in y */
+ for (i = 0; i < PyUnicode_GET_SIZE(x); i++) {
+ key = PyInt_FromLong(PyUnicode_AS_UNICODE(x)[i]);
+ value = PyInt_FromLong(PyUnicode_AS_UNICODE(y)[i]);
+ if (!key || !value)
goto err;
- res = PyDict_SetItem(newtable, newkey, value);
- Py_DECREF(newkey);
+ res = PyDict_SetItem(new, key, value);
+ Py_DECREF(key);
+ Py_DECREF(value);
if (res < 0)
goto err;
- } else if (PyInt_Check(key)) {
- /* just keep integer keys */
- if (PyDict_SetItem(newtable, key, value) < 0)
- goto err;
- } else {
- PyErr_SetString(PyExc_TypeError, "items in translate table must be "
- "strings or integers");
+ }
+ /* create entries for deleting chars in z */
+ if (z != NULL) {
+ for (i = 0; i < PyUnicode_GET_SIZE(z); i++) {
+ key = PyInt_FromLong(PyUnicode_AS_UNICODE(z)[i]);
+ if (!key)
+ goto err;
+ res = PyDict_SetItem(new, key, Py_None);
+ Py_DECREF(key);
+ if (res < 0)
+ goto err;
+ }
+ }
+ } else {
+ /* x must be a dict */
+ if (!PyDict_Check(x)) {
+ PyErr_SetString(PyExc_TypeError, "if you give only one argument "
+ "to maketrans it must be a dict");
goto err;
}
+ /* copy entries into the new dict, converting string keys to int keys */
+ while (PyDict_Next(x, &i, &key, &value)) {
+ if (PyUnicode_Check(key)) {
+ /* convert string keys to integer keys */
+ PyObject *newkey;
+ if (PyUnicode_GET_SIZE(key) != 1) {
+ PyErr_SetString(PyExc_ValueError, "string keys in translate "
+ "table must be of length 1");
+ goto err;
+ }
+ newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
+ if (!newkey)
+ goto err;
+ res = PyDict_SetItem(new, newkey, value);
+ Py_DECREF(newkey);
+ if (res < 0)
+ goto err;
+ } else if (PyInt_Check(key)) {
+ /* just keep integer keys */
+ if (PyDict_SetItem(new, key, value) < 0)
+ goto err;
+ } else {
+ PyErr_SetString(PyExc_TypeError, "keys in translate table must "
+ "be strings or integers");
+ goto err;
+ }
+ }
}
-
- result = PyUnicode_TranslateCharmap(self->str,
- self->length,
- newtable,
- "ignore");
- Py_DECREF(newtable);
- return result;
+ return new;
err:
- Py_DECREF(newtable);
+ Py_DECREF(new);
return NULL;
}
+PyDoc_STRVAR(translate__doc__,
+"S.translate(table) -> unicode\n\
+\n\
+Return a copy of the string S, where all characters have been mapped\n\
+through the given translation table, which must be a mapping of\n\
+Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
+Unmapped characters are left untouched. Characters mapped to None\n\
+are deleted.");
+
+static PyObject*
+unicode_translate(PyUnicodeObject *self, PyObject *table)
+{
+ return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
+}
+
PyDoc_STRVAR(upper__doc__,
"S.upper() -> unicode\n\
\n\
@@ -8076,6 +8132,8 @@ static PyMethodDef unicode_methods[] = {
{"__format__", (PyCFunction) unicode_unicode__format__, METH_VARARGS, p_format__doc__},
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
+ {"maketrans", (PyCFunction) unicode_maketrans,
+ METH_VARARGS | METH_STATIC, maketrans__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
#endif