summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-12-04 12:29:05 (GMT)
committerGitHub <noreply@github.com>2017-12-04 12:29:05 (GMT)
commit70d56fb52582d9d3f7c00860d6e90570c6259371 (patch)
tree61e54b78f19535bfcf41d521b98def725de63497 /Lib
parente69fbb6a560a02d0587b9075afd338a1e9073af0 (diff)
downloadcpython-70d56fb52582d9d3f7c00860d6e90570c6259371.zip
cpython-70d56fb52582d9d3f7c00860d6e90570c6259371.tar.gz
cpython-70d56fb52582d9d3f7c00860d6e90570c6259371.tar.bz2
bpo-25054, bpo-1647489: Added support of splitting on zerowidth patterns. (#4471)
Also fixed searching patterns that could match an empty string.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/doctest.py2
-rw-r--r--Lib/test/test_re.py44
2 files changed, 32 insertions, 14 deletions
diff --git a/Lib/doctest.py b/Lib/doctest.py
index 5e5bc21..c1d8a1d 100644
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -1611,7 +1611,7 @@ class OutputChecker:
'', want)
# If a line in got contains only spaces, then remove the
# spaces.
- got = re.sub(r'(?m)^\s*?$', '', got)
+ got = re.sub(r'(?m)^[^\S\n]+$', '', got)
if got == want:
return True
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index ee87446..2344d71 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -331,21 +331,21 @@ class ReTests(unittest.TestCase):
['', 'a', '', '', 'c'])
for sep, expected in [
- (':*', ['', 'a', 'b', 'c']),
- ('(?::*)', ['', 'a', 'b', 'c']),
- ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
- ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
+ (':*', ['', 'a', 'b', 'c', '']),
+ ('(?::*)', ['', 'a', 'b', 'c', '']),
+ ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c', '', '']),
+ ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c', None, '']),
]:
- with self.subTest(sep=sep), self.assertWarns(FutureWarning):
+ with self.subTest(sep=sep):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
for sep, expected in [
- ('', [':a:b::c']),
- (r'\b', [':a:b::c']),
- (r'(?=:)', [':a:b::c']),
- (r'(?<=:)', [':a:b::c']),
+ ('', ['', ':', 'a', ':', 'b', ':', ':', 'c', '']),
+ (r'\b', [':', 'a', ':', 'b', '::', 'c', '']),
+ (r'(?=:)', ['', ':a', ':b', ':', ':c']),
+ (r'(?<=:)', [':', 'a:', 'b:', ':', 'c']),
]:
- with self.subTest(sep=sep), self.assertRaises(ValueError):
+ with self.subTest(sep=sep):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
def test_qualified_re_split(self):
@@ -356,9 +356,8 @@ class ReTests(unittest.TestCase):
['', ':', 'a', ':', 'b::c'])
self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
['', ':', 'a', ':', 'b::c'])
- with self.assertWarns(FutureWarning):
- self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
- ['', ':', 'a', ':', 'b::c'])
+ self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
+ ['', ':', 'a', ':', 'b::c'])
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
@@ -1751,6 +1750,25 @@ class ReTests(unittest.TestCase):
"span=(3, 5), match='bb'>" %
(type(second).__module__, type(second).__qualname__))
+ def test_zerowidth(self):
+ # Issues 852532, 1647489, 3262, 25054.
+ self.assertEqual(re.split(r"\b", "a::bc"), ['', 'a', '::', 'bc', ''])
+ self.assertEqual(re.split(r"\b|:+", "a::bc"), ['', 'a', '', 'bc', ''])
+ self.assertEqual(re.split(r"(?<!\w)(?=\w)|:+", "a::bc"), ['', 'a', 'bc'])
+ self.assertEqual(re.split(r"(?<=\w)(?!\w)|:+", "a::bc"), ['a', '', 'bc', ''])
+
+ self.assertEqual(re.sub(r"\b", "-", "a::bc"), '-a-::-bc-')
+ self.assertEqual(re.sub(r"\b|:+", "-", "a::bc"), '-a--bc-')
+ self.assertEqual(re.sub(r"(\b|:+)", r"[\1]", "a::bc"), '[]a[][::]bc[]')
+
+ self.assertEqual(re.findall(r"\b|:+", "a::bc"), ['', '', '::', '', ''])
+ self.assertEqual(re.findall(r"\b|\w+", "a::bc"),
+ ['', 'a', '', '', 'bc', ''])
+
+ self.assertEqual([m.span() for m in re.finditer(r"\b|:+", "a::bc")],
+ [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)])
+ self.assertEqual([m.span() for m in re.finditer(r"\b|\w+", "a::bc")],
+ [(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)])
def test_bug_2537(self):
# issue 2537: empty submatches