From 882cb79afa2cb11b180ef699fd5cf038e72f6c85 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 16 Aug 2023 23:35:35 +0300 Subject: gh-56166: Deprecate passing confusing positional arguments in re functions (#107778) Deprecate passing optional arguments maxsplit, count and flags in module-level functions re.split(), re.sub() and re.subn() as positional. They should only be passed by keyword. --- Doc/library/re.rst | 24 ++++---- Doc/whatsnew/3.13.rst | 7 +++ Lib/re/__init__.py | 67 +++++++++++++++++++- Lib/test/test_re.py | 71 ++++++++++++++++++++-- .../2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst | 3 + 5 files changed, 153 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 3f03f03..ab201e2 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -898,7 +898,7 @@ Functions ['Words', 'words', 'words', ''] >>> re.split(r'(\W+)', 'Words, words, words.') ['Words', ', ', 'words', ', ', 'words', '.', ''] - >>> re.split(r'\W+', 'Words, words, words.', 1) + >>> re.split(r'\W+', 'Words, words, words.', maxsplit=1) ['Words', 'words, words.'] >>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) ['0', '3', '9'] @@ -929,6 +929,11 @@ Functions .. versionchanged:: 3.7 Added support of splitting on a pattern that could match an empty string. + .. deprecated:: 3.13 + Passing *maxsplit* and *flags* as positional arguments is deprecated. + In future Python versions they will be + :ref:`keyword-only parameters `. + .. function:: findall(pattern, string, flags=0) @@ -1027,8 +1032,6 @@ Functions .. versionchanged:: 3.7 Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter now are errors. - - .. versionchanged:: 3.7 Empty matches for the pattern are replaced when adjacent to a previous non-empty match. @@ -1037,18 +1040,17 @@ Functions In :class:`bytes` replacement strings, group *name* can only contain bytes in the ASCII range (``b'\x00'``-``b'\x7f'``). + .. deprecated:: 3.13 + Passing *count* and *flags* as positional arguments is deprecated. + In future Python versions they will be + :ref:`keyword-only parameters `. + .. function:: subn(pattern, repl, string, count=0, flags=0) Perform the same operation as :func:`sub`, but return a tuple ``(new_string, number_of_subs_made)``. - .. versionchanged:: 3.1 - Added the optional flags argument. - - .. versionchanged:: 3.5 - Unmatched groups are replaced with an empty string. - .. function:: escape(pattern) @@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it: .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> [re.split(":? ", entry, 3) for entry in entries] + >>> [re.split(":? ", entry, maxsplit=3) for entry in entries] [['Ross', 'McFluff', '834.345.1254', '155 Elm Street'], ['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'], ['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'], @@ -1669,7 +1671,7 @@ house number from the street name: .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> [re.split(":? ", entry, 4) for entry in entries] + >>> [re.split(":? ", entry, maxsplit=4) for entry in entries] [['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'], ['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'], ['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'], diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 84ffd84..519dee5 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -832,6 +832,13 @@ Porting to Python 3.13 Deprecated ---------- +* Passing optional arguments *maxsplit*, *count* and *flags* in module-level + functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional + arguments is now deprecated. + In future Python versions these parameters will be + :ref:`keyword-only `. + (Contributed by Serhiy Storchaka in :gh:`56166`.) + * Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`. diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index d6fccd5..428d1b0 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -175,16 +175,39 @@ def search(pattern, string, flags=0): a Match object, or None if no match was found.""" return _compile(pattern, flags).search(string) -def sub(pattern, repl, string, count=0, flags=0): +class _ZeroSentinel(int): + pass +_zero_sentinel = _ZeroSentinel() + +def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" + if args: + if count is not _zero_sentinel: + raise TypeError("sub() got multiple values for argument 'count'") + count, *args = args + if args: + if flags is not _zero_sentinel: + raise TypeError("sub() got multiple values for argument 'flags'") + flags, *args = args + if args: + raise TypeError("sub() takes from 3 to 5 positional arguments " + "but %d were given" % (5 + len(args))) + + import warnings + warnings.warn( + "'count' is passed as positional argument", + DeprecationWarning, stacklevel=2 + ) + return _compile(pattern, flags).sub(repl, string, count) +sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)' -def subn(pattern, repl, string, count=0, flags=0): +def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel): """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source @@ -193,9 +216,28 @@ def subn(pattern, repl, string, count=0, flags=0): callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" + if args: + if count is not _zero_sentinel: + raise TypeError("subn() got multiple values for argument 'count'") + count, *args = args + if args: + if flags is not _zero_sentinel: + raise TypeError("subn() got multiple values for argument 'flags'") + flags, *args = args + if args: + raise TypeError("subn() takes from 3 to 5 positional arguments " + "but %d were given" % (5 + len(args))) + + import warnings + warnings.warn( + "'count' is passed as positional argument", + DeprecationWarning, stacklevel=2 + ) + return _compile(pattern, flags).subn(repl, string, count) +subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)' -def split(pattern, string, maxsplit=0, flags=0): +def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel): """Split the source string by the occurrences of the pattern, returning a list containing the resulting substrings. If capturing parentheses are used in pattern, then the text of all @@ -203,7 +245,26 @@ def split(pattern, string, maxsplit=0, flags=0): list. If maxsplit is nonzero, at most maxsplit splits occur, and the remainder of the string is returned as the final element of the list.""" + if args: + if maxsplit is not _zero_sentinel: + raise TypeError("split() got multiple values for argument 'maxsplit'") + maxsplit, *args = args + if args: + if flags is not _zero_sentinel: + raise TypeError("split() got multiple values for argument 'flags'") + flags, *args = args + if args: + raise TypeError("split() takes from 2 to 4 positional arguments " + "but %d were given" % (4 + len(args))) + + import warnings + warnings.warn( + "'maxsplit' is passed as positional argument", + DeprecationWarning, stacklevel=2 + ) + return _compile(pattern, flags).split(string, maxsplit) +split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)' def findall(pattern, string, flags=0): """Return a list of all non-overlapping matches in the string. diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 042f97f..45bce19 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -127,8 +127,10 @@ class ReTests(unittest.TestCase): self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), '9.3 -3 24x100y') - self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), - '9.3 -3 23x99y') + with self.assertWarns(DeprecationWarning) as w: + self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), + '9.3 -3 23x99y') + self.assertEqual(w.filename, __file__) self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3), '9.3 -3 23x99y') @@ -235,9 +237,42 @@ class ReTests(unittest.TestCase): def test_qualified_re_sub(self): self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') - self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') + with self.assertWarns(DeprecationWarning) as w: + self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') + self.assertEqual(w.filename, __file__) self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa') + with self.assertRaisesRegex(TypeError, + r"sub\(\) got multiple values for argument 'count'"): + re.sub('a', 'b', 'aaaaa', 1, count=1) + with self.assertRaisesRegex(TypeError, + r"sub\(\) got multiple values for argument 'flags'"): + re.sub('a', 'b', 'aaaaa', 1, 0, flags=0) + with self.assertRaisesRegex(TypeError, + r"sub\(\) takes from 3 to 5 positional arguments but 6 " + r"were given"): + re.sub('a', 'b', 'aaaaa', 1, 0, 0) + + def test_misuse_flags(self): + with self.assertWarns(DeprecationWarning) as w: + result = re.sub('a', 'b', 'aaaaa', re.I) + self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I))) + self.assertEqual(str(w.warning), + "'count' is passed as positional argument") + self.assertEqual(w.filename, __file__) + with self.assertWarns(DeprecationWarning) as w: + result = re.subn("b*", "x", "xyz", re.I) + self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I))) + self.assertEqual(str(w.warning), + "'count' is passed as positional argument") + self.assertEqual(w.filename, __file__) + with self.assertWarns(DeprecationWarning) as w: + result = re.split(":", ":a:b::c", re.I) + self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I))) + self.assertEqual(str(w.warning), + "'maxsplit' is passed as positional argument") + self.assertEqual(w.filename, __file__) + def test_bug_114660(self): self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 'hello there') @@ -344,9 +379,22 @@ class ReTests(unittest.TestCase): self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) - self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) + with self.assertWarns(DeprecationWarning) as w: + self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) + self.assertEqual(w.filename, __file__) self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2)) + with self.assertRaisesRegex(TypeError, + r"subn\(\) got multiple values for argument 'count'"): + re.subn('a', 'b', 'aaaaa', 1, count=1) + with self.assertRaisesRegex(TypeError, + r"subn\(\) got multiple values for argument 'flags'"): + re.subn('a', 'b', 'aaaaa', 1, 0, flags=0) + with self.assertRaisesRegex(TypeError, + r"subn\(\) takes from 3 to 5 positional arguments but 6 " + r"were given"): + re.subn('a', 'b', 'aaaaa', 1, 0, 0) + def test_re_split(self): for string in ":a:b::c", S(":a:b::c"): self.assertTypedEqual(re.split(":", string), @@ -401,7 +449,9 @@ class ReTests(unittest.TestCase): self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) def test_qualified_re_split(self): - self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) + with self.assertWarns(DeprecationWarning) as w: + self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) + self.assertEqual(w.filename, __file__) self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c']) self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d']) self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2), @@ -411,6 +461,17 @@ class ReTests(unittest.TestCase): self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2), ['', ':', '', '', 'a:b::c']) + with self.assertRaisesRegex(TypeError, + r"split\(\) got multiple values for argument 'maxsplit'"): + re.split(":", ":a:b::c", 2, maxsplit=2) + with self.assertRaisesRegex(TypeError, + r"split\(\) got multiple values for argument 'flags'"): + re.split(":", ":a:b::c", 2, 0, flags=0) + with self.assertRaisesRegex(TypeError, + r"split\(\) takes from 2 to 4 positional arguments but 5 " + r"were given"): + re.split(":", ":a:b::c", 2, 0, 0) + def test_re_findall(self): self.assertEqual(re.findall(":+", "abc"), []) for string in "a:b::c:::d", S("a:b::c:::d"): diff --git a/Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst b/Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst new file mode 100644 index 0000000..34d776a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-08-16-09-59.gh-issue-56166.WUMhYG.rst @@ -0,0 +1,3 @@ +Deprecate passing optional arguments *maxsplit*, *count* and *flags* in +module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional. +They should only be passed by keyword. -- cgit v0.12