diff options
Diffstat (limited to 'Lib/fnmatch.py')
-rw-r--r-- | Lib/fnmatch.py | 90 |
1 files changed, 42 insertions, 48 deletions
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 73acb1f..865baea 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -77,24 +77,30 @@ def translate(pat): There is no way to quote meta-characters. """ - STAR = object() - parts = _translate(pat, STAR, '.') - return _join_translated_parts(parts, STAR) + parts, star_indices = _translate(pat, '*', '.') + return _join_translated_parts(parts, star_indices) +_re_setops_sub = re.compile(r'([&~|])').sub +_re_escape = functools.lru_cache(maxsize=512)(re.escape) -def _translate(pat, STAR, QUESTION_MARK): +def _translate(pat, star, question_mark): res = [] add = res.append + star_indices = [] + i, n = 0, len(pat) while i < n: c = pat[i] i = i+1 if c == '*': + # store the position of the wildcard + star_indices.append(len(res)) + add(star) # compress consecutive `*` into one - if (not res) or res[-1] is not STAR: - add(STAR) + while i < n and pat[i] == '*': + i += 1 elif c == '?': - add(QUESTION_MARK) + add(question_mark) elif c == '[': j = i if j < n and pat[j] == '!': @@ -133,8 +139,6 @@ def _translate(pat, STAR, QUESTION_MARK): # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') for s in chunks) - # Escape set operations (&&, ~~ and ||). - stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 if not stuff: # Empty range: never match. @@ -143,50 +147,40 @@ def _translate(pat, STAR, QUESTION_MARK): # Negated empty range: match any character. add('.') else: + # Escape set operations (&&, ~~ and ||). + stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff add(f'[{stuff}]') else: - add(re.escape(c)) - assert i == n - return res - - -def _join_translated_parts(inp, STAR): - # Deal with STARs. - res = [] - add = res.append - i, n = 0, len(inp) - # Fixed pieces at the start? - while i < n and inp[i] is not STAR: - add(inp[i]) - i += 1 - # Now deal with STAR fixed STAR fixed ... - # For an interior `STAR fixed` pairing, we want to do a minimal - # .*? match followed by `fixed`, with no possibility of backtracking. - # Atomic groups ("(?>...)") allow us to spell that directly. - # Note: people rely on the undocumented ability to join multiple - # translate() results together via "|" to build large regexps matching - # "one of many" shell patterns. - while i < n: - assert inp[i] is STAR - i += 1 - if i == n: - add(".*") - break - assert inp[i] is not STAR - fixed = [] - while i < n and inp[i] is not STAR: - fixed.append(inp[i]) - i += 1 - fixed = "".join(fixed) - if i == n: - add(".*") - add(fixed) - else: - add(f"(?>.*?{fixed})") + add(_re_escape(c)) assert i == n - res = "".join(res) + return res, star_indices + + +def _join_translated_parts(parts, star_indices): + if not star_indices: + return fr'(?s:{"".join(parts)})\Z' + iter_star_indices = iter(star_indices) + j = next(iter_star_indices) + buffer = parts[:j] # fixed pieces at the start + append, extend = buffer.append, buffer.extend + i = j + 1 + for j in iter_star_indices: + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # Atomic groups ("(?>...)") allow us to spell that directly. + # Note: people rely on the undocumented ability to join multiple + # translate() results together via "|" to build large regexps matching + # "one of many" shell patterns. + append('(?>.*?') + extend(parts[i:j]) + append(')') + i = j + 1 + append('.*') + extend(parts[i:]) + res = ''.join(buffer) return fr'(?s:{res})\Z' |