From abc387747dc573e05a4b31387797a0272062b2ef Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 12 Apr 2009 15:51:51 +0000 Subject: Add bytes/bytearray.maketrans() to mirror str.maketrans(), and deprecate string.maketrans() which actually works on bytes. (Also closes #5675.) --- Doc/library/stdtypes.rst | 27 +++++++++++++------ Doc/library/string.rst | 9 +++---- Include/bytes_methods.h | 4 +++ Lib/string.py | 3 +++ Lib/test/test_bigmem.py | 19 ++++++-------- Lib/test/test_bytes.py | 7 +++++ Lib/test/test_string.py | 8 ------ Misc/NEWS | 5 ++++ Objects/bytearrayobject.c | 9 +++++++ Objects/bytes_methods.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ Objects/bytesobject.c | 8 ++++++ 11 files changed, 134 insertions(+), 32 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index a8c3146..72e2fb4 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -479,7 +479,7 @@ debugging, and in numerical work. exponent. -.. method:: float.fromhex(s) +.. classmethod:: float.fromhex(s) Class method to return the float represented by a hexadecimal string *s*. The string *s* may have leading and trailing @@ -967,7 +967,7 @@ functions based on regular expressions. 'example.com' -.. method:: str.maketrans(x[, y[, z]]) +.. staticmethod:: str.maketrans(x[, y[, z]]) This static method returns a translation table usable for :meth:`str.translate`. @@ -1514,8 +1514,8 @@ Wherever one of these methods needs to interpret the bytes as characters The bytes and bytearray types have an additional class method: -.. method:: bytes.fromhex(string) - bytearray.fromhex(string) +.. classmethod:: bytes.fromhex(string) + bytearray.fromhex(string) This :class:`bytes` class method returns a bytes or bytearray object, decoding the given string object. The string must contain two hexadecimal @@ -1524,7 +1524,9 @@ The bytes and bytearray types have an additional class method: >>> bytes.fromhex('f0 f1f2 ') b'\xf0\xf1\xf2' -The translate method differs in semantics from the version available on strings: + +The maketrans and translate methods differ in semantics from the versions +available on strings: .. method:: bytes.translate(table[, delete]) @@ -1533,8 +1535,7 @@ The translate method differs in semantics from the version available on strings: mapped through the given translation table, which must be a bytes object of length 256. - You can use the :func:`string.maketrans` helper function to create a - translation table. + You can use the :func:`bytes.maketrans` method to create a translation table. Set the *table* argument to ``None`` for translations that only delete characters:: @@ -1543,6 +1544,16 @@ The translate method differs in semantics from the version available on strings: b'rd ths shrt txt' +.. staticmethod:: bytes.maketrans(from, to) + + This static method returns a translation table usable for + :meth:`bytes.translate` that will map each character in *from* into the + character at the same position in *to*; *from* and *to* must be bytes objects + and have the same length. + + .. versionadded:: 3.1 + + .. _types-set: Set Types --- :class:`set`, :class:`frozenset` @@ -1847,7 +1858,7 @@ pairs within braces, for example: ``{'jack': 4098, 'sjoerd': 4127}`` or ``{4098: Return a shallow copy of the dictionary. - .. method:: fromkeys(seq[, value]) + .. classmethod:: fromkeys(seq[, value]) Create a new dictionary with keys from *seq* and values set to *value*. diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 5867a5a..29bf160 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -548,13 +548,9 @@ rule: delimiter), and it should appear last in the regular expression. -String functions +Helper functions ---------------- -The following functions are available to operate on string objects. -They are not available as string methods. - - .. function:: capwords(s) Split the argument into words using :func:`split`, capitalize each word using @@ -568,3 +564,6 @@ They are not available as string methods. Return a translation table suitable for passing to :meth:`bytes.translate`, that will map each character in *from* into the character at the same position in *to*; *from* and *to* must have the same length. + + .. deprecated:: 3.1 + Use the :meth:`bytes.maketrans` static method instead. diff --git a/Include/bytes_methods.h b/Include/bytes_methods.h index 37518d2..e973261 100644 --- a/Include/bytes_methods.h +++ b/Include/bytes_methods.h @@ -20,6 +20,9 @@ extern void _Py_bytes_title(char *result, char *s, Py_ssize_t len); extern void _Py_bytes_capitalize(char *result, char *s, Py_ssize_t len); extern void _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len); +/* This one gets the raw argument list. */ +extern PyObject* _Py_bytes_maketrans(PyObject *args); + /* Shared __doc__ strings. */ extern const char _Py_isspace__doc__[]; extern const char _Py_isalpha__doc__[]; @@ -33,6 +36,7 @@ extern const char _Py_upper__doc__[]; extern const char _Py_title__doc__[]; extern const char _Py_capitalize__doc__[]; extern const char _Py_swapcase__doc__[]; +extern const char _Py_maketrans__doc__[]; #define FLAG_LOWER 0x01 #define FLAG_UPPER 0x02 diff --git a/Lib/string.py b/Lib/string.py index ea0d359..8667c0e 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -49,6 +49,9 @@ def maketrans(frm: bytes, to: bytes) -> bytes: mapped to the byte at the same position in to. The strings frm and to must be of the same length. """ + import warnings + warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead", + DeprecationWarning) if len(frm) != len(to): raise ValueError("maketrans arguments must have same length") if not (isinstance(frm, bytes) and isinstance(to, bytes)): diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index 7896748..091893e 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -418,18 +418,15 @@ class BaseStrTest: @bigmemtest(minsize=_2G, memuse=2) def test_translate(self, size): _ = self.from_latin1 - trans = { - ord(_('.')): _('-'), - ord(_('a')): _('!'), - ord(_('Z')): _('$'), - } SUBSTR = _('aZz.z.Aaz.') - if not isinstance(SUBSTR, str): - # Workaround the inexistence of bytes.maketrans() - chars = bytearray(range(256)) - for k, v in trans.items(): - chars[k] = ord(v) - trans = chars + if isinstance(SUBSTR, str): + trans = { + ord(_('.')): _('-'), + ord(_('a')): _('!'), + ord(_('Z')): _('$'), + } + else: + trans = bytes.maketrans(b'.aZ', b'-!$') sublen = len(SUBSTR) repeats = size // sublen + 2 s = SUBSTR * repeats diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index cb7fb46..a3ea40a 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -450,6 +450,13 @@ class BaseBytesTest(unittest.TestCase): self.assertEqual([ord(b[i:i+1]) for i in range(len(b))], [0, 65, 127, 128, 255]) + def test_maketrans(self): + transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' + + self.assertEqual(self.type2test.maketrans(b'abc', b'xyz'), transtable) + self.assertRaises(ValueError, self.type2test.maketrans, b'abc', b'xyzq') + self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def') + class BytesTest(BaseBytesTest): type2test = bytes diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py index 66f2204..16d1edb 100644 --- a/Lib/test/test_string.py +++ b/Lib/test/test_string.py @@ -101,14 +101,6 @@ class ModuleTest(unittest.TestCase): self.assertRaises(ValueError, fmt.format, "{0}", 10, 20, i=100) self.assertRaises(ValueError, fmt.format, "{i}", 10, 20, i=100) - - def test_maketrans(self): - transtable = b'\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' - - self.assertEqual(string.maketrans(b'abc', b'xyz'), transtable) - self.assertRaises(ValueError, string.maketrans, b'abc', b'xyzq') - self.assertRaises(TypeError, string.maketrans, 'abc', 'def') - def test_main(): support.run_unittest(ModuleTest) diff --git a/Misc/NEWS b/Misc/NEWS index a08f320..b89516c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,11 @@ What's New in Python 3.1 beta 1? Core and Builtins ----------------- +- The string.maketrans() function is deprecated; there is a new static method + maketrans() on the bytes and bytearray classes. This removes confusion about + the types string.maketrans() is supposed to work with, and mirrors the + methods available on the str class. + - Issue #2170: refactored xml.dom.minidom.normalize, increasing both its clarity and its speed. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index bc8f39e..ca597a2 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1451,6 +1451,13 @@ done: } +static PyObject * +bytes_maketrans(PyObject *null, PyObject *args) +{ + return _Py_bytes_maketrans(args); +} + + #define FORWARD 1 #define REVERSE -1 @@ -3131,6 +3138,8 @@ bytes_methods[] = { {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__}, + {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, + _Py_maketrans__doc__}, {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__}, {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__}, {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__}, diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index 2d55601..403b131 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -608,3 +608,70 @@ _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len) } } + +PyDoc_STRVAR_shared(_Py_maketrans__doc__, +"B.maketrans(frm, to) -> translation table\n\ +\n\ +Return a translation table (a bytes object of length 256)\n\ +suitable for use in bytes.translate where each byte in frm is\n\ +mapped to the byte at the same position in to.\n\ +The strings frm and to must be of the same length."); + +static Py_ssize_t +_getbuffer(PyObject *obj, Py_buffer *view) +{ + PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer; + + if (buffer == NULL || buffer->bf_getbuffer == NULL) + { + PyErr_Format(PyExc_TypeError, + "Type %.100s doesn't support the buffer API", + Py_TYPE(obj)->tp_name); + return -1; + } + + if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0) + return -1; + return view->len; +} + +PyObject * +_Py_bytes_maketrans(PyObject *args) +{ + PyObject *frm, *to, *res = NULL; + Py_buffer bfrm, bto; + int i; + char *p; + + bfrm.len = -1; + bto.len = -1; + + if (!PyArg_ParseTuple(args, "OO:maketrans", &frm, &to)) + return NULL; + if (_getbuffer(frm, &bfrm) < 0) + return NULL; + if (_getbuffer(to, &bto) < 0) + goto done; + if (bfrm.len != bto.len) { + PyErr_Format(PyExc_ValueError, + "maketrans arguments must have same length"); + goto done; + } + res = PyBytes_FromStringAndSize(NULL, 256); + if (!res) { + goto done; + } + p = PyBytes_AS_STRING(res); + for (i = 0; i < 256; i++) + p[i] = i; + for (i = 0; i < bfrm.len; i++) { + p[(int)((char *)bfrm.buf)[i]] = ((char *)bto.buf)[i]; + } + + done: + if (bfrm.len != -1) + PyBuffer_Release(&bfrm); + if (bto.len != -1) + PyBuffer_Release(&bto); + return res; +} diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index d5c2bea..1239680 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1950,6 +1950,12 @@ string_translate(PyBytesObject *self, PyObject *args) } +static PyObject * +string_maketrans(PyObject *null, PyObject *args) +{ + return _Py_bytes_maketrans(args); +} + #define FORWARD 1 #define REVERSE -1 @@ -2851,6 +2857,8 @@ string_methods[] = { {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, + {"maketrans", (PyCFunction)string_maketrans, METH_VARARGS|METH_STATIC, + _Py_maketrans__doc__}, {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__}, {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, -- cgit v0.12