diff options
author | Tim Peters <tim.peters@gmail.com> | 2022-03-21 17:49:43 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-21 17:49:43 (GMT) |
commit | 5c3201e146b251017cd77202015f47912ddcb980 (patch) | |
tree | ce73de73a7ab00cda60b49f83e7deed1f3bec044 | |
parent | 345b390ed69f36681dbc41187bc8f49cd9135b54 (diff) | |
download | cpython-5c3201e146b251017cd77202015f47912ddcb980.zip cpython-5c3201e146b251017cd77202015f47912ddcb980.tar.gz cpython-5c3201e146b251017cd77202015f47912ddcb980.tar.bz2 |
bpo-47080: Use atomic groups to simplify fnmatch (GH-32029)
Use re's new atomic groups to greatly simplify the construction of worst-case linear-time patterns.
-rw-r--r-- | Lib/fnmatch.py | 24 | ||||
-rw-r--r-- | Lib/test/test_fnmatch.py | 12 |
2 files changed, 7 insertions, 29 deletions
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 239c749..0f5a41a 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -16,12 +16,6 @@ import functools __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] -# Build a thread-safe incrementing counter to help create unique regexp group -# names across calls. -from itertools import count -_nextgroupnum = count().__next__ -del count - def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -149,17 +143,10 @@ def translate(pat): # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking. - # We can't spell that directly, but can trick it into working by matching - # .*?fixed - # in a lookahead assertion, save the matched part in a group, then - # consume that group via a backreference. If the overall match fails, - # the lookahead assertion won't try alternatives. So the translation is: - # (?=(?P<name>.*?fixed))(?P=name) - # Group names are created as needed: g0, g1, g2, ... - # The numbers are obtained from _nextgroupnum() to ensure they're unique - # across calls and across threads. This is because people rely on the - # undocumented ability to join multiple translate() results together via - # "|" to build large regexps matching "one of many" shell patterns. + # Atomic groups ("(?>...)") allow us to spell that directly. + # Note: people rely on the undocumented ability to join multiple + # translate() results together via "|" to build large regexps matching + # "one of many" shell patterns. while i < n: assert inp[i] is STAR i += 1 @@ -176,8 +163,7 @@ def translate(pat): add(".*") add(fixed) else: - groupnum = _nextgroupnum() - add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})") + add(f"(?>.*?{fixed})") assert i == n res = "".join(res) return fr'(?s:{res})\Z' diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 10668e4..ca695d6 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -124,17 +124,9 @@ class TranslateTestCase(unittest.TestCase): self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') # fancy translation to prevent exponential-time match failure t = translate('**a*a****a') - digits = re.findall(r'\d+', t) - self.assertEqual(len(digits), 4) - self.assertEqual(digits[0], digits[1]) - self.assertEqual(digits[2], digits[3]) - g1 = f"g{digits[0]}" # e.g., group name "g4" - g2 = f"g{digits[2]}" # e.g., group name "g5" - self.assertEqual(t, - fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z') + self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z') # and try pasting multiple translate results - it's an undocumented - # feature that this works; all the pain of generating unique group - # names across calls exists to support this + # feature that this works r1 = translate('**a**a**a*') r2 = translate('**b**b**b*') r3 = translate('*c*c*c*') |