summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2022-06-05 09:39:03 (GMT)
committerGitHub <noreply@github.com>2022-06-05 09:39:03 (GMT)
commit2f8aae38b9ce19dfd00356927a68cd00366331bc (patch)
treef0f466b433886157170e7403c18a197b00f4e1f7
parent60adc4b92a8a6fe115a023c8f639a6de4730fac1 (diff)
downloadcpython-2f8aae38b9ce19dfd00356927a68cd00366331bc.zip
cpython-2f8aae38b9ce19dfd00356927a68cd00366331bc.tar.gz
cpython-2f8aae38b9ce19dfd00356927a68cd00366331bc.tar.bz2
gh-89973: Fix re.error in the fnmatch module. (GH-93072)
Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. (cherry picked from commit 0902c3d8edf7ef67972dd95f6a21670f5d1a4251) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
-rw-r--r--Lib/fnmatch.py30
-rw-r--r--Lib/test/test_fnmatch.py114
-rw-r--r--Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst3
3 files changed, 140 insertions, 7 deletions
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 7c52c23..fee59bf 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -108,7 +108,7 @@ def translate(pat):
add('\\[')
else:
stuff = pat[i:j]
- if '--' not in stuff:
+ if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
@@ -120,7 +120,16 @@ def translate(pat):
chunks.append(pat[i:k])
i = k+1
k = k+3
- chunks.append(pat[i:j])
+ chunk = pat[i:j]
+ if chunk:
+ chunks.append(chunk)
+ else:
+ chunks[-1] += '-'
+ # Remove empty ranges -- invalid in RE.
+ for k in range(len(chunks)-1, 0, -1):
+ if chunks[k-1][-1] > chunks[k][0]:
+ chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
+ del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
@@ -128,11 +137,18 @@ def translate(pat):
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
- if stuff[0] == '!':
- stuff = '^' + stuff[1:]
- elif stuff[0] in ('^', '['):
- stuff = '\\' + stuff
- add(f'[{stuff}]')
+ if not stuff:
+ # Empty range: never match.
+ add('(?!)')
+ elif stuff == '!':
+ # Negated empty range: match any character.
+ add('.')
+ else:
+ if stuff[0] == '!':
+ stuff = '^' + stuff[1:]
+ elif stuff[0] in ('^', '['):
+ stuff = '\\' + stuff
+ add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 10668e4..8e2d274 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -2,6 +2,7 @@
import unittest
import os
+import string
import warnings
from fnmatch import fnmatch, fnmatchcase, translate, filter
@@ -91,6 +92,119 @@ class FnmatchTestCase(unittest.TestCase):
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr\\bin')
+ def test_char_set(self):
+ ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
+ check = self.check_match
+ tescases = string.ascii_lowercase + string.digits + string.punctuation
+ for c in tescases:
+ check(c, '[az]', c in 'az')
+ check(c, '[!az]', c not in 'az')
+ # Case insensitive.
+ for c in tescases:
+ check(c, '[AZ]', (c in 'az') and ignorecase)
+ check(c, '[!AZ]', (c not in 'az') or not ignorecase)
+ for c in string.ascii_uppercase:
+ check(c, '[az]', (c in 'AZ') and ignorecase)
+ check(c, '[!az]', (c not in 'AZ') or not ignorecase)
+ # Repeated same character.
+ for c in tescases:
+ check(c, '[aa]', c == 'a')
+ # Special cases.
+ for c in tescases:
+ check(c, '[^az]', c in '^az')
+ check(c, '[[az]', c in '[az')
+ check(c, r'[!]]', c != ']')
+ check('[', '[')
+ check('[]', '[]')
+ check('[!', '[!')
+ check('[!]', '[!]')
+
+ def test_range(self):
+ ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
+ normsep = os.path.normcase('\\') == os.path.normcase('/')
+ check = self.check_match
+ tescases = string.ascii_lowercase + string.digits + string.punctuation
+ for c in tescases:
+ check(c, '[b-d]', c in 'bcd')
+ check(c, '[!b-d]', c not in 'bcd')
+ check(c, '[b-dx-z]', c in 'bcdxyz')
+ check(c, '[!b-dx-z]', c not in 'bcdxyz')
+ # Case insensitive.
+ for c in tescases:
+ check(c, '[B-D]', (c in 'bcd') and ignorecase)
+ check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
+ for c in string.ascii_uppercase:
+ check(c, '[b-d]', (c in 'BCD') and ignorecase)
+ check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
+ # Upper bound == lower bound.
+ for c in tescases:
+ check(c, '[b-b]', c == 'b')
+ # Special cases.
+ for c in tescases:
+ check(c, '[!-#]', c not in '-#')
+ check(c, '[!--.]', c not in '-.')
+ check(c, '[^-`]', c in '^_`')
+ if not (normsep and c == '/'):
+ check(c, '[[-^]', c in r'[\]^')
+ check(c, r'[\-^]', c in r'\]^')
+ check(c, '[b-]', c in '-b')
+ check(c, '[!b-]', c not in '-b')
+ check(c, '[-b]', c in '-b')
+ check(c, '[!-b]', c not in '-b')
+ check(c, '[-]', c in '-')
+ check(c, '[!-]', c not in '-')
+ # Upper bound is less that lower bound: error in RE.
+ for c in tescases:
+ check(c, '[d-b]', False)
+ check(c, '[!d-b]', True)
+ check(c, '[d-bx-z]', c in 'xyz')
+ check(c, '[!d-bx-z]', c not in 'xyz')
+ check(c, '[d-b^-`]', c in '^_`')
+ if not (normsep and c == '/'):
+ check(c, '[d-b[-^]', c in r'[\]^')
+
+ def test_sep_in_char_set(self):
+ normsep = os.path.normcase('\\') == os.path.normcase('/')
+ check = self.check_match
+ check('/', r'[/]')
+ check('\\', r'[\]')
+ check('/', r'[\]', normsep)
+ check('\\', r'[/]', normsep)
+ check('[/]', r'[/]', False)
+ check(r'[\\]', r'[/]', False)
+ check('\\', r'[\t]')
+ check('/', r'[\t]', normsep)
+ check('t', r'[\t]')
+ check('\t', r'[\t]', False)
+
+ def test_sep_in_range(self):
+ normsep = os.path.normcase('\\') == os.path.normcase('/')
+ check = self.check_match
+ check('a/b', 'a[.-0]b', not normsep)
+ check('a\\b', 'a[.-0]b', False)
+ check('a\\b', 'a[Z-^]b', not normsep)
+ check('a/b', 'a[Z-^]b', False)
+
+ check('a/b', 'a[/-0]b', not normsep)
+ check(r'a\b', 'a[/-0]b', False)
+ check('a[/-0]b', 'a[/-0]b', False)
+ check(r'a[\-0]b', 'a[/-0]b', False)
+
+ check('a/b', 'a[.-/]b')
+ check(r'a\b', 'a[.-/]b', normsep)
+ check('a[.-/]b', 'a[.-/]b', False)
+ check(r'a[.-\]b', 'a[.-/]b', False)
+
+ check(r'a\b', r'a[\-^]b')
+ check('a/b', r'a[\-^]b', normsep)
+ check(r'a[\-^]b', r'a[\-^]b', False)
+ check('a[/-^]b', r'a[\-^]b', False)
+
+ check(r'a\b', r'a[Z-\]b', not normsep)
+ check('a/b', r'a[Z-\]b', False)
+ check(r'a[Z-\]b', r'a[Z-\]b', False)
+ check('a[Z-/]b', r'a[Z-\]b', False)
+
def test_warnings(self):
with warnings.catch_warnings():
warnings.simplefilter('error', Warning)
diff --git a/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst
new file mode 100644
index 0000000..7e61fd7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst
@@ -0,0 +1,3 @@
+Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a
+character range with upper bound lower than lower bound (e.g. ``[c-a]``).
+Now such ranges are interpreted as empty ranges.