#14081: The sep and maxsplit parameter to str.split, bytes.split, and bytearray.split may now be passed as keyword arguments.

author: Ezio Melotti <ezio.melotti@gmail.com> 2012-02-26 07:39:55 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2012-02-26 07:39:55 (GMT)
commit: cda6b6d60d96e6f755da92deb5e4066839095791 (patch)
tree: 0835098963d975f54d46707bae270e08df2fc4fa
parent: 408026c7e8c019cf04372a4267c832241e18c62c (diff)
download: cpython-cda6b6d60d96e6f755da92deb5e4066839095791.zip
cpython-cda6b6d60d96e6f755da92deb5e4066839095791.tar.gz
cpython-cda6b6d60d96e6f755da92deb5e4066839095791.tar.bz2
7 files changed, 80 insertions, 27 deletions
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 183b2f7..be06595 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -1301,7 +1301,7 @@ functions based on regular expressions.
    two empty strings, followed by the string itself.
 
 
-.. method:: str.rsplit([sep[, maxsplit]])
+.. method:: str.rsplit(sep=None, maxsplit=-1)
 
    Return a list of the words in the string, using *sep* as the delimiter string.
    If *maxsplit* is given, at most *maxsplit* splits are done, the *rightmost*
@@ -1323,7 +1323,7 @@ functions based on regular expressions.
       'mississ'
 
 
-.. method:: str.split([sep[, maxsplit]])
+.. method:: str.split(sep=None, maxsplit=-1)
 
    Return a list of the words in the string, using *sep* as the delimiter
    string.  If *maxsplit* is given, at most *maxsplit* splits are done (thus,
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index feeb4ce..b7246eb 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -56,7 +56,7 @@ class BaseTest(unittest.TestCase):
         result = self.fixtype(result)
         obj = self.fixtype(obj)
         args = self.fixtype(args)
-        kwargs = self.fixtype(kwargs)
+        kwargs = {k: self.fixtype(v) for k,v in kwargs.items()}
         realresult = getattr(obj, methodname)(*args, **kwargs)
         self.assertEqual(
             result,
@@ -389,6 +389,17 @@ class BaseTest(unittest.TestCase):
         self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4],
                         'split', 'BLAH', 18)
 
+        # with keyword args
+        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', sep='|')
+        self.checkequal(['a', 'b|c|d'],
+                        'a|b|c|d', 'split', '|', maxsplit=1)
+        self.checkequal(['a', 'b|c|d'],
+                        'a|b|c|d', 'split', sep='|', maxsplit=1)
+        self.checkequal(['a', 'b|c|d'],
+                        'a|b|c|d', 'split', maxsplit=1, sep='|')
+        self.checkequal(['a', 'b c d'],
+                        'a b c d', 'split', maxsplit=1)
+
         # argument type
         self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
 
@@ -446,6 +457,17 @@ class BaseTest(unittest.TestCase):
         self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4],
                         'rsplit', 'BLAH', 18)
 
+        # with keyword args
+        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|')
+        self.checkequal(['a|b|c', 'd'],
+                        'a|b|c|d', 'rsplit', '|', maxsplit=1)
+        self.checkequal(['a|b|c', 'd'],
+                        'a|b|c|d', 'rsplit', sep='|', maxsplit=1)
+        self.checkequal(['a|b|c', 'd'],
+                        'a|b|c|d', 'rsplit', maxsplit=1, sep='|')
+        self.checkequal(['a b c', 'd'],
+                        'a b c d', 'rsplit', maxsplit=1)
+
         # argument type
         self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
 
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index bfb88de..203fc5c 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -435,6 +435,14 @@ class BaseBytesTest(unittest.TestCase):
         self.assertEqual(b.split(b'i'), [b'm', b'ss', b'ss', b'pp', b''])
         self.assertEqual(b.split(b'ss'), [b'mi', b'i', b'ippi'])
         self.assertEqual(b.split(b'w'), [b])
+        # with keyword args
+        b = self.type2test(b'a|b|c|d')
+        self.assertEqual(b.split(sep=b'|'), [b'a', b'b', b'c', b'd'])
+        self.assertEqual(b.split(b'|', maxsplit=1), [b'a', b'b|c|d'])
+        self.assertEqual(b.split(sep=b'|', maxsplit=1), [b'a', b'b|c|d'])
+        self.assertEqual(b.split(maxsplit=1, sep=b'|'), [b'a', b'b|c|d'])
+        b = self.type2test(b'a b c d')
+        self.assertEqual(b.split(maxsplit=1), [b'a', b'b c d'])
 
     def test_split_whitespace(self):
         for b in (b'  arf  barf  ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf',
@@ -463,6 +471,14 @@ class BaseBytesTest(unittest.TestCase):
         self.assertEqual(b.rsplit(b'i'), [b'm', b'ss', b'ss', b'pp', b''])
         self.assertEqual(b.rsplit(b'ss'), [b'mi', b'i', b'ippi'])
         self.assertEqual(b.rsplit(b'w'), [b])
+        # with keyword args
+        b = self.type2test(b'a|b|c|d')
+        self.assertEqual(b.rsplit(sep=b'|'), [b'a', b'b', b'c', b'd'])
+        self.assertEqual(b.rsplit(b'|', maxsplit=1), [b'a|b|c', b'd'])
+        self.assertEqual(b.rsplit(sep=b'|', maxsplit=1), [b'a|b|c', b'd'])
+        self.assertEqual(b.rsplit(maxsplit=1, sep=b'|'), [b'a|b|c', b'd'])
+        b = self.type2test(b'a b c d')
+        self.assertEqual(b.rsplit(maxsplit=1), [b'a b c', b'd'])
 
     def test_rsplit_whitespace(self):
         for b in (b'  arf  barf  ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf',
diff --git a/Misc/NEWS b/Misc/NEWS
index 3565203..bc9fe5d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -223,6 +223,9 @@ Core and Builtins
 - PEP 393: flexible string representation. Thanks to Torsten Becker for the
   initial implementation, and Victor Stinner for various bug fixes.
 
+- Issue #14081: The 'sep' and 'maxsplit' parameter to str.split, bytes.split,
+  and bytearray.split may now be passed as keyword arguments.
+
 - Issue #13012: The 'keepends' parameter to str.splitlines may now be passed
   as a keyword argument:  "my_string.splitlines(keepends=True)".  The same
   change also applies to bytes.splitlines and bytearray.splitlines.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 2571858..1b88f12 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -2039,7 +2039,7 @@ bytearray_replace(PyByteArrayObject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(split__doc__,
-"B.split([sep[, maxsplit]]) -> list of bytearrays\n\
+"B.split(sep=None, maxsplit=-1) -> list of bytearrays\n\
 \n\
 Return a list of the sections in B, using sep as the delimiter.\n\
 If sep is not given, B is split on ASCII whitespace characters\n\
@@ -2047,15 +2047,17 @@ If sep is not given, B is split on ASCII whitespace characters\n\
 If maxsplit is given, at most maxsplit splits are done.");
 
 static PyObject *
-bytearray_split(PyByteArrayObject *self, PyObject *args)
+bytearray_split(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
     Py_ssize_t maxsplit = -1;
     const char *s = PyByteArray_AS_STRING(self), *sub;
     PyObject *list, *subobj = Py_None;
     Py_buffer vsub;
 
-    if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
+                                     kwlist, &subobj, &maxsplit))
         return NULL;
     if (maxsplit < 0)
         maxsplit = PY_SSIZE_T_MAX;
@@ -2131,7 +2133,7 @@ bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
 }
 
 PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit(sep[, maxsplit]) -> list of bytearrays\n\
+"B.rsplit(sep=None, maxsplit=-1) -> list of bytearrays\n\
 \n\
 Return a list of the sections in B, using sep as the delimiter,\n\
 starting at the end of B and working to the front.\n\
@@ -2140,15 +2142,17 @@ If sep is not given, B is split on ASCII whitespace characters\n\
 If maxsplit is given, at most maxsplit splits are done.");
 
 static PyObject *
-bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
+bytearray_rsplit(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
     Py_ssize_t maxsplit = -1;
     const char *s = PyByteArray_AS_STRING(self), *sub;
     PyObject *list, *subobj = Py_None;
     Py_buffer vsub;
 
-    if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
+                                     kwlist, &subobj, &maxsplit))
         return NULL;
     if (maxsplit < 0)
         maxsplit = PY_SSIZE_T_MAX;
@@ -2869,9 +2873,9 @@ bytearray_methods[] = {
     {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
     {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
-    {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
+    {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
     {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
-    {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
+    {"split", (PyCFunction)bytearray_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
     {"splitlines", (PyCFunction)bytearray_splitlines,
      METH_VARARGS | METH_KEYWORDS, splitlines__doc__},
     {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index a98cdcf..1d2fed7 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -972,7 +972,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
 #define STRIPNAME(i) (stripformat[i]+3)
 
 PyDoc_STRVAR(split__doc__,
-"B.split([sep[, maxsplit]]) -> list of bytes\n\
+"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
 \n\
 Return a list of the sections in B, using sep as the delimiter.\n\
 If sep is not specified or is None, B is split on ASCII whitespace\n\
@@ -980,15 +980,17 @@ characters (space, tab, return, newline, formfeed, vertical tab).\n\
 If maxsplit is given, at most maxsplit splits are done.");
 
 static PyObject *
-bytes_split(PyBytesObject *self, PyObject *args)
+bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
     Py_ssize_t maxsplit = -1;
     const char *s = PyBytes_AS_STRING(self), *sub;
     Py_buffer vsub;
     PyObject *list, *subobj = Py_None;
 
-    if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
+                                     kwlist, &subobj, &maxsplit))
         return NULL;
     if (maxsplit < 0)
         maxsplit = PY_SSIZE_T_MAX;
@@ -1060,7 +1062,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
 }
 
 PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
+"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
 \n\
 Return a list of the sections in B, using sep as the delimiter,\n\
 starting at the end of B and working to the front.\n\
@@ -1070,15 +1072,17 @@ If maxsplit is given, at most maxsplit splits are done.");
 
 
 static PyObject *
-bytes_rsplit(PyBytesObject *self, PyObject *args)
+bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
     Py_ssize_t maxsplit = -1;
     const char *s = PyBytes_AS_STRING(self), *sub;
     Py_buffer vsub;
     PyObject *list, *subobj = Py_None;
 
-    if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
+                                     kwlist, &subobj, &maxsplit))
         return NULL;
     if (maxsplit < 0)
         maxsplit = PY_SSIZE_T_MAX;
@@ -2470,9 +2474,9 @@ bytes_methods[] = {
     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
     {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
      rpartition__doc__},
-    {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
+    {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
     {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
-    {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
+    {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
     {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
      splitlines__doc__},
     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7cc6b1b..a4dcdf6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12499,7 +12499,7 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
 }
 
 PyDoc_STRVAR(split__doc__,
-             "S.split([sep[, maxsplit]]) -> list of strings\n\
+             "S.split(sep=None, maxsplit=-1) -> list of strings\n\
 \n\
 Return a list of the words in S, using sep as the\n\
 delimiter string.  If maxsplit is given, at most maxsplit\n\
@@ -12508,12 +12508,14 @@ whitespace string is a separator and empty strings are\n\
 removed from the result.");
 
 static PyObject*
-unicode_split(PyObject *self, PyObject *args)
+unicode_split(PyObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     PyObject *substring = Py_None;
     Py_ssize_t maxcount = -1;
 
-    if (!PyArg_ParseTuple(args, "|On:split", &substring, &maxcount))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
+                                     kwlist, &substring, &maxcount))
         return NULL;
 
     if (substring == Py_None)
@@ -12722,7 +12724,7 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
 }
 
 PyDoc_STRVAR(rsplit__doc__,
-             "S.rsplit([sep[, maxsplit]]) -> list of strings\n\
+             "S.rsplit(sep=None, maxsplit=-1) -> list of strings\n\
 \n\
 Return a list of the words in S, using sep as the\n\
 delimiter string, starting at the end of the string and\n\
@@ -12731,12 +12733,14 @@ splits are done. If sep is not specified, any whitespace string\n\
 is a separator.");
 
 static PyObject*
-unicode_rsplit(PyObject *self, PyObject *args)
+unicode_rsplit(PyObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     PyObject *substring = Py_None;
     Py_ssize_t maxcount = -1;
 
-    if (!PyArg_ParseTuple(args, "|On:rsplit", &substring, &maxcount))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
+                                     kwlist, &substring, &maxcount))
         return NULL;
 
     if (substring == Py_None)
@@ -13167,8 +13171,8 @@ static PyMethodDef unicode_methods[] = {
 
     {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
     {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
-    {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
-    {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
+    {"split", (PyCFunction) unicode_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
+    {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
     {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
     {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
     {"casefold", (PyCFunction) unicode_casefold, METH_NOARGS, casefold__doc__},
author	Ezio Melotti <ezio.melotti@gmail.com>	2012-02-26 07:39:55 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2012-02-26 07:39:55 (GMT)
commit	cda6b6d60d96e6f755da92deb5e4066839095791 (patch)
tree	0835098963d975f54d46707bae270e08df2fc4fa
parent	408026c7e8c019cf04372a4267c832241e18c62c (diff)
download	cpython-cda6b6d60d96e6f755da92deb5e4066839095791.zip cpython-cda6b6d60d96e6f755da92deb5e4066839095791.tar.gz cpython-cda6b6d60d96e6f755da92deb5e4066839095791.tar.bz2