From 7438e4b56fa6a34a021f11e1220331e841419b96 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Oct 2014 11:06:31 +0300 Subject: Issue 1519638: Now unmatched groups are replaced with empty strings in re.sub() and re.subn(). --- Doc/library/re.rst | 8 ++++++++ Doc/whatsnew/3.5.rst | 3 +++ Lib/sre_parse.py | 8 +++----- Lib/test/test_re.py | 10 ++++++++-- Misc/NEWS | 5 ++++- 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index edb2486..48e3006 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -701,6 +701,9 @@ form. .. versionchanged:: 3.1 Added the optional flags argument. + .. versionchanged:: 3.5 + Unmatched groups are replaced with an empty string. + .. function:: subn(pattern, repl, string, count=0, flags=0) @@ -710,6 +713,9 @@ form. .. versionchanged:: 3.1 Added the optional flags argument. + .. versionchanged:: 3.5 + Unmatched groups are replaced with an empty string. + .. function:: escape(string) @@ -885,6 +891,8 @@ Match objects support the following methods and attributes: (``\g<1>``, ``\g``) are replaced by the contents of the corresponding group. + .. versionchanged:: 3.5 + Unmatched groups are replaced with an empty string. .. method:: match.group([group1, ...]) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index feca241..319284a 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -223,6 +223,9 @@ re * Number of capturing groups in regular expression is no longer limited by 100. (Contributed by Serhiy Storchaka in :issue:`22437`.) +* Now unmatched groups are replaced with empty strings in :func:`re.sub` + and :func:`re.subn`. (Contributed by Serhiy Storchaka in :issue:`1519638`.) + shutil ------ diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index b9a1852..063d1b7 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -880,14 +880,12 @@ def parse_template(source, pattern): def expand_template(template, match): g = match.group - sep = match.string[:0] + empty = match.string[:0] groups, literals = template literals = literals[:] try: for index, group in groups: - literals[index] = s = g(group) - if s is None: - raise error("unmatched group") + literals[index] = g(group) or empty except IndexError: raise error("invalid group reference") - return sep.join(literals) + return empty.join(literals) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index e5ad6cb..0e4fa88 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -225,9 +225,11 @@ class ReTests(unittest.TestCase): self.assertRaises(re.error, re.sub, '(?Px)', '\g', 'xx') self.assertRaises(re.error, re.sub, '(?Px)', '\g<>', 'xx') self.assertRaises(re.error, re.sub, '(?Px)', '\g<1a1>', 'xx') + self.assertRaises(re.error, re.sub, '(?Px)', r'\g<2>', 'xx') + self.assertRaises(re.error, re.sub, '(?Px)', r'\2', 'xx') self.assertRaises(IndexError, re.sub, '(?Px)', '\g', 'xx') - self.assertRaises(re.error, re.sub, '(?Px)|(?Py)', '\g', 'xx') - self.assertRaises(re.error, re.sub, '(?Px)|(?Py)', '\\2', 'xx') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') self.assertRaises(re.error, re.sub, '(?Px)', '\g<-1>', 'xx') # New valid/invalid identifiers in Python 3 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx') @@ -439,6 +441,10 @@ class ReTests(unittest.TestCase): "first second") .expand(r"\2 \1 \g \g"), "second first second first") + self.assertEqual(re.match("(?Pfirst)|(?Psecond)", + "first") + .expand(r"\2 \g"), + " ") def test_repeat_minmax(self): self.assertIsNone(re.match("^(\w){1}$", "abc")) diff --git a/Misc/NEWS b/Misc/NEWS index 21ee7b2..c3932d7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -166,7 +166,10 @@ Core and Builtins Library ------- -- Issue $18615: sndhdr.what/whathdr now return a namedtuple. +- Issue 1519638: Now unmatched groups are replaced with empty strings in re.sub() + and re.subn(). + +- Issue #18615: sndhdr.what/whathdr now return a namedtuple. - Issue #22462: Fix pyexpat's creation of a dummy frame to make it appear in exception tracebacks. -- cgit v0.12