summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/re.rst24
-rw-r--r--Doc/whatsnew/3.13.rst7
-rw-r--r--Lib/re/__init__.py67
-rw-r--r--Lib/test/test_re.py71
-rw-r--r--Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst3
5 files changed, 153 insertions, 19 deletions
diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 3f03f03..ab201e2 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -898,7 +898,7 @@ Functions
['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', '']
- >>> re.split(r'\W+', 'Words, words, words.', 1)
+ >>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
['0', '3', '9']
@@ -929,6 +929,11 @@ Functions
.. versionchanged:: 3.7
Added support of splitting on a pattern that could match an empty string.
+ .. deprecated:: 3.13
+ Passing *maxsplit* and *flags* as positional arguments is deprecated.
+ In future Python versions they will be
+ :ref:`keyword-only parameters <keyword-only_parameter>`.
+
.. function:: findall(pattern, string, flags=0)
@@ -1027,8 +1032,6 @@ Functions
.. versionchanged:: 3.7
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
now are errors.
-
- .. versionchanged:: 3.7
Empty matches for the pattern are replaced when adjacent to a previous
non-empty match.
@@ -1037,18 +1040,17 @@ Functions
In :class:`bytes` replacement strings, group *name* can only contain bytes
in the ASCII range (``b'\x00'``-``b'\x7f'``).
+ .. deprecated:: 3.13
+ Passing *count* and *flags* as positional arguments is deprecated.
+ In future Python versions they will be
+ :ref:`keyword-only parameters <keyword-only_parameter>`.
+
.. function:: subn(pattern, repl, string, count=0, flags=0)
Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``.
- .. versionchanged:: 3.1
- Added the optional flags argument.
-
- .. versionchanged:: 3.5
- Unmatched groups are replaced with an empty string.
-
.. function:: escape(pattern)
@@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it:
.. doctest::
:options: +NORMALIZE_WHITESPACE
- >>> [re.split(":? ", entry, 3) for entry in entries]
+ >>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
@@ -1669,7 +1671,7 @@ house number from the street name:
.. doctest::
:options: +NORMALIZE_WHITESPACE
- >>> [re.split(":? ", entry, 4) for entry in entries]
+ >>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 84ffd84..519dee5 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -832,6 +832,13 @@ Porting to Python 3.13
Deprecated
----------
+* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
+ functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
+ arguments is now deprecated.
+ In future Python versions these parameters will be
+ :ref:`keyword-only <keyword-only_parameter>`.
+ (Contributed by Serhiy Storchaka in :gh:`56166`.)
+
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.
diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py
index d6fccd5..428d1b0 100644
--- a/Lib/re/__init__.py
+++ b/Lib/re/__init__.py
@@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
-def sub(pattern, repl, string, count=0, flags=0):
+class _ZeroSentinel(int):
+ pass
+_zero_sentinel = _ZeroSentinel()
+
+def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
+ if args:
+ if count is not _zero_sentinel:
+ raise TypeError("sub() got multiple values for argument 'count'")
+ count, *args = args
+ if args:
+ if flags is not _zero_sentinel:
+ raise TypeError("sub() got multiple values for argument 'flags'")
+ flags, *args = args
+ if args:
+ raise TypeError("sub() takes from 3 to 5 positional arguments "
+ "but %d were given" % (5 + len(args)))
+
+ import warnings
+ warnings.warn(
+ "'count' is passed as positional argument",
+ DeprecationWarning, stacklevel=2
+ )
+
return _compile(pattern, flags).sub(repl, string, count)
+sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
-def subn(pattern, repl, string, count=0, flags=0):
+def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
@@ -193,9 +216,28 @@ def subn(pattern, repl, string, count=0, flags=0):
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
+ if args:
+ if count is not _zero_sentinel:
+ raise TypeError("subn() got multiple values for argument 'count'")
+ count, *args = args
+ if args:
+ if flags is not _zero_sentinel:
+ raise TypeError("subn() got multiple values for argument 'flags'")
+ flags, *args = args
+ if args:
+ raise TypeError("subn() takes from 3 to 5 positional arguments "
+ "but %d were given" % (5 + len(args)))
+
+ import warnings
+ warnings.warn(
+ "'count' is passed as positional argument",
+ DeprecationWarning, stacklevel=2
+ )
+
return _compile(pattern, flags).subn(repl, string, count)
+subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
-def split(pattern, string, maxsplit=0, flags=0):
+def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
@@ -203,7 +245,26 @@ def split(pattern, string, maxsplit=0, flags=0):
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
+ if args:
+ if maxsplit is not _zero_sentinel:
+ raise TypeError("split() got multiple values for argument 'maxsplit'")
+ maxsplit, *args = args
+ if args:
+ if flags is not _zero_sentinel:
+ raise TypeError("split() got multiple values for argument 'flags'")
+ flags, *args = args
+ if args:
+ raise TypeError("split() takes from 2 to 4 positional arguments "
+ "but %d were given" % (4 + len(args)))
+
+ import warnings
+ warnings.warn(
+ "'maxsplit' is passed as positional argument",
+ DeprecationWarning, stacklevel=2
+ )
+
return _compile(pattern, flags).split(string, maxsplit)
+split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 042f97f..45bce19 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -127,8 +127,10 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
- self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
- '9.3 -3 23x99y')
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
+ '9.3 -3 23x99y')
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')
@@ -235,9 +237,42 @@ class ReTests(unittest.TestCase):
def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
- self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
+ with self.assertRaisesRegex(TypeError,
+ r"sub\(\) got multiple values for argument 'count'"):
+ re.sub('a', 'b', 'aaaaa', 1, count=1)
+ with self.assertRaisesRegex(TypeError,
+ r"sub\(\) got multiple values for argument 'flags'"):
+ re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
+ with self.assertRaisesRegex(TypeError,
+ r"sub\(\) takes from 3 to 5 positional arguments but 6 "
+ r"were given"):
+ re.sub('a', 'b', 'aaaaa', 1, 0, 0)
+
+ def test_misuse_flags(self):
+ with self.assertWarns(DeprecationWarning) as w:
+ result = re.sub('a', 'b', 'aaaaa', re.I)
+ self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
+ self.assertEqual(str(w.warning),
+ "'count' is passed as positional argument")
+ self.assertEqual(w.filename, __file__)
+ with self.assertWarns(DeprecationWarning) as w:
+ result = re.subn("b*", "x", "xyz", re.I)
+ self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
+ self.assertEqual(str(w.warning),
+ "'count' is passed as positional argument")
+ self.assertEqual(w.filename, __file__)
+ with self.assertWarns(DeprecationWarning) as w:
+ result = re.split(":", ":a:b::c", re.I)
+ self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
+ self.assertEqual(str(w.warning),
+ "'maxsplit' is passed as positional argument")
+ self.assertEqual(w.filename, __file__)
+
def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
'hello there')
@@ -344,9 +379,22 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
- self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
+ with self.assertRaisesRegex(TypeError,
+ r"subn\(\) got multiple values for argument 'count'"):
+ re.subn('a', 'b', 'aaaaa', 1, count=1)
+ with self.assertRaisesRegex(TypeError,
+ r"subn\(\) got multiple values for argument 'flags'"):
+ re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
+ with self.assertRaisesRegex(TypeError,
+ r"subn\(\) takes from 3 to 5 positional arguments but 6 "
+ r"were given"):
+ re.subn('a', 'b', 'aaaaa', 1, 0, 0)
+
def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
@@ -401,7 +449,9 @@ class ReTests(unittest.TestCase):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
def test_qualified_re_split(self):
- self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
@@ -411,6 +461,17 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
['', ':', '', '', 'a:b::c'])
+ with self.assertRaisesRegex(TypeError,
+ r"split\(\) got multiple values for argument 'maxsplit'"):
+ re.split(":", ":a:b::c", 2, maxsplit=2)
+ with self.assertRaisesRegex(TypeError,
+ r"split\(\) got multiple values for argument 'flags'"):
+ re.split(":", ":a:b::c", 2, 0, flags=0)
+ with self.assertRaisesRegex(TypeError,
+ r"split\(\) takes from 2 to 4 positional arguments but 5 "
+ r"were given"):
+ re.split(":", ":a:b::c", 2, 0, 0)
+
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
for string in "a:b::c:::d", S("a:b::c:::d"):
diff --git a/Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst b/Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst
new file mode 100644
index 0000000..34d776a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst
@@ -0,0 +1,3 @@
+Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
+module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
+They should only be passed by keyword.