summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/fnmatch.py90
-rw-r--r--Lib/glob.py2
-rw-r--r--Lib/test/test_fnmatch.py69
-rw-r--r--Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst2
4 files changed, 114 insertions, 49 deletions
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 73acb1f..865baea 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -77,24 +77,30 @@ def translate(pat):
There is no way to quote meta-characters.
"""
- STAR = object()
- parts = _translate(pat, STAR, '.')
- return _join_translated_parts(parts, STAR)
+ parts, star_indices = _translate(pat, '*', '.')
+ return _join_translated_parts(parts, star_indices)
+_re_setops_sub = re.compile(r'([&~|])').sub
+_re_escape = functools.lru_cache(maxsize=512)(re.escape)
-def _translate(pat, STAR, QUESTION_MARK):
+def _translate(pat, star, question_mark):
res = []
add = res.append
+ star_indices = []
+
i, n = 0, len(pat)
while i < n:
c = pat[i]
i = i+1
if c == '*':
+ # store the position of the wildcard
+ star_indices.append(len(res))
+ add(star)
# compress consecutive `*` into one
- if (not res) or res[-1] is not STAR:
- add(STAR)
+ while i < n and pat[i] == '*':
+ i += 1
elif c == '?':
- add(QUESTION_MARK)
+ add(question_mark)
elif c == '[':
j = i
if j < n and pat[j] == '!':
@@ -133,8 +139,6 @@ def _translate(pat, STAR, QUESTION_MARK):
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
- # Escape set operations (&&, ~~ and ||).
- stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if not stuff:
# Empty range: never match.
@@ -143,50 +147,40 @@ def _translate(pat, STAR, QUESTION_MARK):
# Negated empty range: match any character.
add('.')
else:
+ # Escape set operations (&&, ~~ and ||).
+ stuff = _re_setops_sub(r'\\\1', stuff)
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
- add(re.escape(c))
- assert i == n
- return res
-
-
-def _join_translated_parts(inp, STAR):
- # Deal with STARs.
- res = []
- add = res.append
- i, n = 0, len(inp)
- # Fixed pieces at the start?
- while i < n and inp[i] is not STAR:
- add(inp[i])
- i += 1
- # Now deal with STAR fixed STAR fixed ...
- # For an interior `STAR fixed` pairing, we want to do a minimal
- # .*? match followed by `fixed`, with no possibility of backtracking.
- # Atomic groups ("(?>...)") allow us to spell that directly.
- # Note: people rely on the undocumented ability to join multiple
- # translate() results together via "|" to build large regexps matching
- # "one of many" shell patterns.
- while i < n:
- assert inp[i] is STAR
- i += 1
- if i == n:
- add(".*")
- break
- assert inp[i] is not STAR
- fixed = []
- while i < n and inp[i] is not STAR:
- fixed.append(inp[i])
- i += 1
- fixed = "".join(fixed)
- if i == n:
- add(".*")
- add(fixed)
- else:
- add(f"(?>.*?{fixed})")
+ add(_re_escape(c))
assert i == n
- res = "".join(res)
+ return res, star_indices
+
+
+def _join_translated_parts(parts, star_indices):
+ if not star_indices:
+ return fr'(?s:{"".join(parts)})\Z'
+ iter_star_indices = iter(star_indices)
+ j = next(iter_star_indices)
+ buffer = parts[:j] # fixed pieces at the start
+ append, extend = buffer.append, buffer.extend
+ i = j + 1
+ for j in iter_star_indices:
+ # Now deal with STAR fixed STAR fixed ...
+ # For an interior `STAR fixed` pairing, we want to do a minimal
+ # .*? match followed by `fixed`, with no possibility of backtracking.
+ # Atomic groups ("(?>...)") allow us to spell that directly.
+ # Note: people rely on the undocumented ability to join multiple
+ # translate() results together via "|" to build large regexps matching
+ # "one of many" shell patterns.
+ append('(?>.*?')
+ extend(parts[i:j])
+ append(')')
+ i = j + 1
+ append('.*')
+ extend(parts[i:])
+ res = ''.join(buffer)
return fr'(?s:{res})\Z'
diff --git a/Lib/glob.py b/Lib/glob.py
index ce9b369..690ab1b 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -312,7 +312,7 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
if part:
if not include_hidden and part[0] in '*?':
results.append(r'(?!\.)')
- results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep))
+ results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0])
if idx < last_part_idx:
results.append(any_sep)
res = ''.join(results)
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 10ed496..9f360e1 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -250,6 +250,75 @@ class TranslateTestCase(unittest.TestCase):
self.assertTrue(re.match(fatre, 'cbabcaxc'))
self.assertFalse(re.match(fatre, 'dabccbad'))
+ def test_translate_wildcards(self):
+ for pattern, expect in [
+ ('ab*', r'(?s:ab.*)\Z'),
+ ('ab*cd', r'(?s:ab.*cd)\Z'),
+ ('ab*cd*', r'(?s:ab(?>.*?cd).*)\Z'),
+ ('ab*cd*12', r'(?s:ab(?>.*?cd).*12)\Z'),
+ ('ab*cd*12*', r'(?s:ab(?>.*?cd)(?>.*?12).*)\Z'),
+ ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\Z'),
+ ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+ ]:
+ with self.subTest(pattern):
+ translated = translate(pattern)
+ self.assertEqual(translated, expect, pattern)
+
+ for pattern, expect in [
+ ('*ab', r'(?s:.*ab)\Z'),
+ ('*ab*', r'(?s:(?>.*?ab).*)\Z'),
+ ('*ab*cd', r'(?s:(?>.*?ab).*cd)\Z'),
+ ('*ab*cd*', r'(?s:(?>.*?ab)(?>.*?cd).*)\Z'),
+ ('*ab*cd*12', r'(?s:(?>.*?ab)(?>.*?cd).*12)\Z'),
+ ('*ab*cd*12*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*)\Z'),
+ ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\Z'),
+ ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+ ]:
+ with self.subTest(pattern):
+ translated = translate(pattern)
+ self.assertEqual(translated, expect, pattern)
+
+ def test_translate_expressions(self):
+ for pattern, expect in [
+ ('[', r'(?s:\[)\Z'),
+ ('[!', r'(?s:\[!)\Z'),
+ ('[]', r'(?s:\[\])\Z'),
+ ('[abc', r'(?s:\[abc)\Z'),
+ ('[!abc', r'(?s:\[!abc)\Z'),
+ ('[abc]', r'(?s:[abc])\Z'),
+ ('[!abc]', r'(?s:[^abc])\Z'),
+ ('[!abc][!def]', r'(?s:[^abc][^def])\Z'),
+ # with [[
+ ('[[', r'(?s:\[\[)\Z'),
+ ('[[a', r'(?s:\[\[a)\Z'),
+ ('[[]', r'(?s:[\[])\Z'),
+ ('[[]a', r'(?s:[\[]a)\Z'),
+ ('[[]]', r'(?s:[\[]\])\Z'),
+ ('[[]a]', r'(?s:[\[]a\])\Z'),
+ ('[[a]', r'(?s:[\[a])\Z'),
+ ('[[a]]', r'(?s:[\[a]\])\Z'),
+ ('[[a]b', r'(?s:[\[a]b)\Z'),
+ # backslashes
+ ('[\\', r'(?s:\[\\)\Z'),
+ (r'[\]', r'(?s:[\\])\Z'),
+ (r'[\\]', r'(?s:[\\\\])\Z'),
+ ]:
+ with self.subTest(pattern):
+ translated = translate(pattern)
+ self.assertEqual(translated, expect, pattern)
+
+ def test_star_indices_locations(self):
+ from fnmatch import _translate
+
+ blocks = ['a^b', '***', '?', '?', '[a-z]', '[1-9]', '*', '++', '[[a']
+ parts, star_indices = _translate(''.join(blocks), '*', '.')
+ expect_parts = ['a', r'\^', 'b', '*',
+ '.', '.', '[a-z]', '[1-9]', '*',
+ r'\+', r'\+', r'\[', r'\[', 'a']
+ self.assertListEqual(parts, expect_parts)
+ self.assertListEqual(star_indices, [3, 8])
+
+
class FilterTestCase(unittest.TestCase):
def test_filter(self):
diff --git a/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst b/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
new file mode 100644
index 0000000..26a18af
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
@@ -0,0 +1,2 @@
+Improve the performances of :func:`fnmatch.translate` by a factor 1.7. Patch
+by Bénédikt Tran.