From 332d7217509443c01ba5c3da41053786b49015fa Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Fri, 18 Sep 2009 21:14:55 +0000 Subject: add keyword arguments support to str/unicode encode and decode #6300 --- Doc/library/stdtypes.rst | 8 ++++++-- Lib/test/test_str.py | 11 +++++++++++ Lib/test/test_unicode.py | 8 ++++++++ Misc/ACKS | 1 + Misc/NEWS | 3 +++ Objects/stringobject.c | 16 ++++++++++------ Objects/unicodeobject.c | 16 ++++++++++------ 7 files changed, 49 insertions(+), 14 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 93ed710..d2dfd11 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -815,8 +815,8 @@ String Methods .. index:: pair: string; methods -Below are listed the string methods which both 8-bit strings and Unicode objects -support. Note that none of these methods take keyword arguments. +Below are listed the string methods which both 8-bit strings and +Unicode objects support. In addition, Python's strings support the sequence type methods described in the :ref:`typesseq` section. To output formatted strings @@ -861,6 +861,8 @@ string functions based on regular expressions. .. versionchanged:: 2.3 Support for other error handling schemes added. + .. versionchanged:: 2.7 + Support for keyword arguments added. .. method:: str.encode([encoding[,errors]]) @@ -879,6 +881,8 @@ string functions based on regular expressions. Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error handling schemes added. + .. versionchanged:: 2.7 + Support for keyword arguments added. .. method:: str.endswith(suffix[, start[, end]]) diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index c546154..fa86db4 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -401,6 +401,17 @@ class StrTest( def test_buffer_is_readonly(self): self.assertRaises(TypeError, sys.stdin.readinto, b"") + def test_encode_and_decode_kwargs(self): + self.assertEqual('abcde'.encode('ascii', 'replace'), + 'abcde'.encode('ascii', errors='replace')) + self.assertEqual('abcde'.encode('ascii', 'ignore'), + 'abcde'.encode(encoding='ascii', errors='ignore')) + self.assertEqual('Andr\202 x'.decode('ascii', 'ignore'), + 'Andr\202 x'.decode('ascii', errors='ignore')) + self.assertEqual('Andr\202 x'.decode('ascii', 'replace'), + 'Andr\202 x'.decode(encoding='ascii', errors='replace')) + + def test_main(): test_support.run_unittest(StrTest) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 43830b2..2b269cc 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -593,12 +593,20 @@ class UnicodeTest( self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict') self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x") self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x") + self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'), + u'Andr\202 x'.encode('ascii', errors='replace')) + self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'), + u'Andr\202 x'.encode(encoding='ascii', errors='ignore')) # Error handling (decoding) self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii') self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict') self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x") self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x') + self.assertEqual(u'abcde'.decode('ascii', 'ignore'), + u'abcde'.decode('ascii', errors='ignore')) + self.assertEqual(u'abcde'.decode('ascii', 'replace'), + u'abcde'.decode(encoding='ascii', errors='replace')) # Error handling (unknown character names) self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx") diff --git a/Misc/ACKS b/Misc/ACKS index 53e386a..e0bd435 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -88,6 +88,7 @@ Peter Bosch Eric Bouck Thierry Bousch Sebastian Boving +Jeff Bradberry Monty Brandenberg Georg Brandl Christopher Brannon diff --git a/Misc/NEWS b/Misc/NEWS index 0ce1168..66ee553 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1 Core and Builtins ----------------- +- Issue #6300: unicode.encode, unicode.docode, str.decode, and str.encode now + take keyword arguments. + - Issue #6922: Fix an infinite loop when trying to decode an invalid UTF-32 stream with a non-raising error handler like "replace" or "ignore". diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 6d933a1..1233fc0 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3332,13 +3332,15 @@ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ codecs.register_error that is able to handle UnicodeEncodeErrors."); static PyObject * -string_encode(PyStringObject *self, PyObject *args) +string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs) { + static char *kwlist[] = {"encoding", "errors", 0}; char *encoding = NULL; char *errors = NULL; PyObject *v; - if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", + kwlist, &encoding, &errors)) return NULL; v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); if (v == NULL) @@ -3369,13 +3371,15 @@ as well as any other name registered with codecs.register_error that is\n\ able to handle UnicodeDecodeErrors."); static PyObject * -string_decode(PyStringObject *self, PyObject *args) +string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs) { + static char *kwlist[] = {"encoding", "errors", 0}; char *encoding = NULL; char *errors = NULL; PyObject *v; - if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", + kwlist, &encoding, &errors)) return NULL; v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); if (v == NULL) @@ -4053,8 +4057,8 @@ string_methods[] = { {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, - {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__}, - {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__}, + {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, + {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__}, {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6164510..2028d96 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6610,13 +6610,15 @@ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ codecs.register_error that can handle UnicodeEncodeErrors."); static PyObject * -unicode_encode(PyUnicodeObject *self, PyObject *args) +unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs) { + static char *kwlist[] = {"encoding", "errors", 0}; char *encoding = NULL; char *errors = NULL; PyObject *v; - if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", + kwlist, &encoding, &errors)) return NULL; v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); if (v == NULL) @@ -6646,13 +6648,15 @@ as well as any other name registerd with codecs.register_error that is\n\ able to handle UnicodeDecodeErrors."); static PyObject * -unicode_decode(PyUnicodeObject *self, PyObject *args) +unicode_decode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs) { + static char *kwlist[] = {"encoding", "errors", 0}; char *encoding = NULL; char *errors = NULL; PyObject *v; - if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", + kwlist, &encoding, &errors)) return NULL; v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors); if (v == NULL) @@ -8054,7 +8058,7 @@ static PyMethodDef unicode_methods[] = { /* Order is according to common usage: often used methods should appear first, since lookup is done sequentially. */ - {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__}, + {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, @@ -8070,7 +8074,7 @@ static PyMethodDef unicode_methods[] = { {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, - {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__}, + {"decode", (PyCFunction) unicode_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, /* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */ {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, -- cgit v0.12