From d6fb104690cdeeea04ecbaf5c9bcafc622e03648 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 8 Apr 2022 08:36:20 -0500 Subject: Fix bad grammar and import docstring for split/rsplit (GH-32381) --- Objects/clinic/unicodeobject.c.h | 34 +++++++++++++++++++++------------- Objects/unicodeobject.c | 25 ++++++++++++++++--------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 9ef8ce2..803b5f2 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -858,15 +858,21 @@ PyDoc_STRVAR(unicode_split__doc__, "split($self, /, sep=None, maxsplit=-1)\n" "--\n" "\n" -"Return a list of the words in the string, using sep as the delimiter string.\n" +"Return a list of the substrings in the string, using sep as the separator string.\n" "\n" " sep\n" -" The delimiter according which to split the string.\n" -" None (the default value) means split according to any whitespace,\n" -" and discard empty strings from the result.\n" +" The separator used to split the string.\n" +"\n" +" When set to None (the default value), will split on any whitespace\n" +" character (including \\\\n \\\\r \\\\t \\\\f and spaces) and will discard\n" +" empty strings from the result.\n" " maxsplit\n" -" Maximum number of splits to do.\n" -" -1 (the default value) means no limit."); +" Maximum number of splits (starting from the left).\n" +" -1 (the default value) means no limit.\n" +"\n" +"Note, str.split() is mainly useful for data that has been intentionally\n" +"delimited. With natural text that includes punctuation, consider using\n" +"the regular expression module."); #define UNICODE_SPLIT_METHODDEF \ {"split", (PyCFunction)(void(*)(void))unicode_split, METH_FASTCALL|METH_KEYWORDS, unicode_split__doc__}, @@ -953,17 +959,19 @@ PyDoc_STRVAR(unicode_rsplit__doc__, "rsplit($self, /, sep=None, maxsplit=-1)\n" "--\n" "\n" -"Return a list of the words in the string, using sep as the delimiter string.\n" +"Return a list of the substrings in the string, using sep as the separator string.\n" "\n" " sep\n" -" The delimiter according which to split the string.\n" -" None (the default value) means split according to any whitespace,\n" -" and discard empty strings from the result.\n" +" The separator used to split the string.\n" +"\n" +" When set to None (the default value), will split on any whitespace\n" +" character (including \\\\n \\\\r \\\\t \\\\f and spaces) and will discard\n" +" empty strings from the result.\n" " maxsplit\n" -" Maximum number of splits to do.\n" +" Maximum number of splits (starting from the left).\n" " -1 (the default value) means no limit.\n" "\n" -"Splits are done starting at the end of the string and working to the front."); +"Splitting starts at the end of the string and works to the front."); #define UNICODE_RSPLIT_METHODDEF \ {"rsplit", (PyCFunction)(void(*)(void))unicode_rsplit, METH_FASTCALL|METH_KEYWORDS, unicode_rsplit__doc__}, @@ -1327,4 +1335,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=f10cf85d3935b3b7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c494bed46209961d input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2d40963..1b2f332 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13155,19 +13155,26 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) str.split as unicode_split sep: object = None - The delimiter according which to split the string. - None (the default value) means split according to any whitespace, - and discard empty strings from the result. + The separator used to split the string. + + When set to None (the default value), will split on any whitespace + character (including \\n \\r \\t \\f and spaces) and will discard + empty strings from the result. maxsplit: Py_ssize_t = -1 - Maximum number of splits to do. + Maximum number of splits (starting from the left). -1 (the default value) means no limit. -Return a list of the words in the string, using sep as the delimiter string. +Return a list of the substrings in the string, using sep as the separator string. + +Note, str.split() is mainly useful for data that has been intentionally +delimited. With natural text that includes punctuation, consider using +the regular expression module. + [clinic start generated code]*/ static PyObject * unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) -/*[clinic end generated code: output=3a65b1db356948dc input=606e750488a82359]*/ +/*[clinic end generated code: output=3a65b1db356948dc input=906d953b44efc43b]*/ { if (sep == Py_None) return split(self, NULL, maxsplit); @@ -13338,14 +13345,14 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) /*[clinic input] str.rsplit as unicode_rsplit = str.split -Return a list of the words in the string, using sep as the delimiter string. +Return a list of the substrings in the string, using sep as the separator string. -Splits are done starting at the end of the string and working to the front. +Splitting starts at the end of the string and works to the front. [clinic start generated code]*/ static PyObject * unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) -/*[clinic end generated code: output=c2b815c63bcabffc input=12ad4bf57dd35f15]*/ +/*[clinic end generated code: output=c2b815c63bcabffc input=ea78406060fce33c]*/ { if (sep == Py_None) return rsplit(self, NULL, maxsplit); -- cgit v0.12