summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2020-05-12 02:19:20 (GMT)
committerGitHub <noreply@github.com>2020-05-12 02:19:20 (GMT)
commitb1b4c790e7d3b5f4244450aefe3d8f01710c13f7 (patch)
treed0f03bf47219bdec01e64b56aed96df96ab427b1
parentd0919f0d6bb757b6bcfd7b2e15656d318c9d5cd9 (diff)
downloadcpython-b1b4c790e7d3b5f4244450aefe3d8f01710c13f7.zip
cpython-b1b4c790e7d3b5f4244450aefe3d8f01710c13f7.tar.gz
cpython-b1b4c790e7d3b5f4244450aefe3d8f01710c13f7.tar.bz2
bpo-40480: restore ability to join fnmatch.translate() results (GH-20049)
In translate(), generate unique group names across calls. The restores the undocumented ability to get a valid regexp by joining multiple translate() results via `|`.
-rw-r--r--Lib/fnmatch.py17
-rw-r--r--Lib/test/test_fnmatch.py24
2 files changed, 34 insertions, 7 deletions
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index d7d915d..0eb1802 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -16,6 +16,12 @@ import functools
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
+# Build a thread-safe incrementing counter to help create unique regexp group
+# names across calls.
+from itertools import count
+_nextgroupnum = count().__next__
+del count
+
def fnmatch(name, pat):
"""Test whether FILENAME matches PATTERN.
@@ -148,9 +154,12 @@ def translate(pat):
# in a lookahead assertion, save the matched part in a group, then
# consume that group via a backreference. If the overall match fails,
# the lookahead assertion won't try alternatives. So the translation is:
- # (?=(P<name>.*?fixed))(?P=name)
- # Group names are created as needed: g1, g2, g3, ...
- groupnum = 0
+ # (?=(?P<name>.*?fixed))(?P=name)
+ # Group names are created as needed: g0, g1, g2, ...
+ # The numbers are obtained from _nextgroupnum() to ensure they're unique
+ # across calls and across threads. This is because people rely on the
+ # undocumented ability to join multiple translate() results together via
+ # "|" to build large regexps matching "one of many" shell patterns.
while i < n:
assert inp[i] is STAR
i += 1
@@ -167,7 +176,7 @@ def translate(pat):
add(".*")
add(fixed)
else:
- groupnum += 1
+ groupnum = _nextgroupnum()
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
assert i == n
res = "".join(res)
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 4c17306..10668e4 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -106,6 +106,7 @@ class FnmatchTestCase(unittest.TestCase):
class TranslateTestCase(unittest.TestCase):
def test_translate(self):
+ import re
self.assertEqual(translate('*'), r'(?s:.*)\Z')
self.assertEqual(translate('?'), r'(?s:.)\Z')
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -122,9 +123,26 @@ class TranslateTestCase(unittest.TestCase):
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
# fancy translation to prevent exponential-time match failure
- self.assertEqual(translate('**a*a****a'),
- r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
-
+ t = translate('**a*a****a')
+ digits = re.findall(r'\d+', t)
+ self.assertEqual(len(digits), 4)
+ self.assertEqual(digits[0], digits[1])
+ self.assertEqual(digits[2], digits[3])
+ g1 = f"g{digits[0]}" # e.g., group name "g4"
+ g2 = f"g{digits[2]}" # e.g., group name "g5"
+ self.assertEqual(t,
+ fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
+ # and try pasting multiple translate results - it's an undocumented
+ # feature that this works; all the pain of generating unique group
+ # names across calls exists to support this
+ r1 = translate('**a**a**a*')
+ r2 = translate('**b**b**b*')
+ r3 = translate('*c*c*c*')
+ fatre = "|".join([r1, r2, r3])
+ self.assertTrue(re.match(fatre, 'abaccad'))
+ self.assertTrue(re.match(fatre, 'abxbcab'))
+ self.assertTrue(re.match(fatre, 'cbabcaxc'))
+ self.assertFalse(re.match(fatre, 'dabccbad'))
class FilterTestCase(unittest.TestCase):