diff options
-rw-r--r-- | Doc/library/re.rst | 8 | ||||
-rw-r--r-- | Doc/whatsnew/3.6.rst | 9 | ||||
-rw-r--r-- | Lib/distutils/filelist.py | 15 | ||||
-rw-r--r-- | Lib/distutils/tests/test_filelist.py | 14 | ||||
-rw-r--r-- | Lib/fnmatch.py | 2 | ||||
-rw-r--r-- | Lib/http/cookies.py | 3 | ||||
-rw-r--r-- | Lib/sre_parse.py | 8 | ||||
-rwxr-xr-x | Lib/test/re_tests.py | 8 | ||||
-rw-r--r-- | Lib/test/test_fnmatch.py | 16 | ||||
-rw-r--r-- | Lib/test/test_pyclbr.py | 2 | ||||
-rw-r--r-- | Lib/test/test_re.py | 3 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
12 files changed, 58 insertions, 34 deletions
diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 5297f0b..87cd553 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -224,12 +224,8 @@ The special characters are: flags are described in :ref:`contents-of-module-re`.) This is useful if you wish to include the flags as part of the regular expression, instead of passing a *flag* argument to the - :func:`re.compile` function. - - Note that the ``(?x)`` flag changes how the expression is parsed. It should be - used first in the expression string, or after one or more whitespace characters. - If there are non-whitespace characters before the flag, the results are - undefined. + :func:`re.compile` function. Flags should be used first in the + expression string. ``(?:...)`` A non-capturing version of regular parentheses. Matches whatever regular diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 6bb3469..8752b83 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -1124,6 +1124,15 @@ Deprecated features that will not be for several Python releases. (Contributed by Emanuel Barry in :issue:`27364`.) +* Inline flags ``(?letters)`` now should be used only at the start of the + regular expression. Inline flags in the middle of the regular expression + affects global flags in Python :mod:`re` module. This is an exception to + other regular expression engines that either apply flags to only part of + the regular expression or treat them as an error. To avoid distinguishing + inline flags in the middle of the regular expression now emit a deprecation + warning. It will be an error in future Python releases. + (Contributed by Serhiy Storchaka in :issue:`22493`.) + Deprecated Python behavior -------------------------- diff --git a/Lib/distutils/filelist.py b/Lib/distutils/filelist.py index 6522e69..c92d5fd 100644 --- a/Lib/distutils/filelist.py +++ b/Lib/distutils/filelist.py @@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0): else: return pattern + # ditch start and end characters + start, _, end = glob_to_re('_').partition('_') + if pattern: pattern_re = glob_to_re(pattern) + assert pattern_re.startswith(start) and pattern_re.endswith(end) else: pattern_re = '' if prefix is not None: - # ditch end of pattern character - empty_pattern = glob_to_re('') - prefix_re = glob_to_re(prefix)[:-len(empty_pattern)] + prefix_re = glob_to_re(prefix) + assert prefix_re.startswith(start) and prefix_re.endswith(end) + prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] sep = os.sep if os.sep == '\\': sep = r'\\' - pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re)) + pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] + pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end) else: # no prefix -- respect anchor flag if anchor: - pattern_re = "^" + pattern_re + pattern_re = r'%s\A%s' % (start, pattern_re[len(start):]) return re.compile(pattern_re) diff --git a/Lib/distutils/tests/test_filelist.py b/Lib/distutils/tests/test_filelist.py index 391af3c..c71342d 100644 --- a/Lib/distutils/tests/test_filelist.py +++ b/Lib/distutils/tests/test_filelist.py @@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer, for glob, regex in ( # simple cases - ('foo*', r'foo[^%(sep)s]*\Z(?ms)'), - ('foo?', r'foo[^%(sep)s]\Z(?ms)'), - ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'), + ('foo*', r'(?s:foo[^%(sep)s]*)\Z'), + ('foo?', r'(?s:foo[^%(sep)s])\Z'), + ('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'), # special cases - (r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'), - (r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'), - ('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'), - (r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')): + (r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'), + (r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'), + ('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'), + (r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')): regex = regex % {'sep': sep} self.assertEqual(glob_to_re(glob), regex) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 07b1229..fd3b514 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -106,4 +106,4 @@ def translate(pat): res = '%s[%s]' % (res, stuff) else: res = res + re.escape(c) - return res + r'\Z(?ms)' + return r'(?s:%s)\Z' % res diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index f078da5..be3b080 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -458,7 +458,6 @@ class Morsel(dict): _LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" _LegalValueChars = _LegalKeyChars + r'\[\]' _CookiePattern = re.compile(r""" - (?x) # This is a verbose pattern \s* # Optional whitespace at start of cookie (?P<key> # Start of group 'key' [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter @@ -475,7 +474,7 @@ _CookiePattern = re.compile(r""" )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII) # May be removed if safe. + """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. # At long last, here is the cookie class. Using this class is almost just like diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index d74e93f..4a77f0c 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -279,6 +279,9 @@ class Tokenizer: break result += c return result + @property + def pos(self): + return self.index - len(self.next or '') def tell(self): return self.index - len(self.next or '') def seek(self, index): @@ -727,8 +730,13 @@ def _parse(source, state, verbose): state.checklookbehindgroup(condgroup, source) elif char in FLAGS or char == "-": # flags + pos = source.pos flags = _parse_flags(source, state, char) if flags is None: # global flags + if pos != 3: # "(?x" + import warnings + warnings.warn('Flags not at the start of the expression', + DeprecationWarning, stacklevel=7) continue add_flags, del_flags = flags group = None diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index d3692f8..a379d33 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -106,8 +106,8 @@ tests = [ ('a.*b', 'acc\nccb', FAIL), ('a.{4,5}b', 'acc\nccb', FAIL), ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'), - ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'), - ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), + ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), + ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), @@ -563,7 +563,7 @@ tests = [ # Check odd placement of embedded pattern modifiers # not an error under PCRE/PRE: - ('w(?i)', 'W', SUCCEED, 'found', 'W'), + ('(?i)w', 'W', SUCCEED, 'found', 'W'), # ('w(?i)', 'W', SYNTAX_ERROR), # Comments using the x embedded pattern modifier @@ -627,7 +627,7 @@ xyzabc # bug 114033: nothing to repeat (r'(x?)?', 'x', SUCCEED, 'found', 'x'), # bug 115040: rescan if flags are modified inside pattern - (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), + (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'), # bug 115618: negative lookahead (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'), # bug 116251: character class bug diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index a5f5832..fb74246 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase): class TranslateTestCase(unittest.TestCase): def test_translate(self): - self.assertEqual(translate('*'), r'.*\Z(?ms)') - self.assertEqual(translate('?'), r'.\Z(?ms)') - self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)') - self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)') - self.assertEqual(translate('[]]'), r'[]]\Z(?ms)') - self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)') - self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)') - self.assertEqual(translate('[x'), r'\[x\Z(?ms)') + self.assertEqual(translate('*'), r'(?s:.*)\Z') + self.assertEqual(translate('?'), r'(?s:.)\Z') + self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z') + self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z') + self.assertEqual(translate('[]]'), r'(?s:[]])\Z') + self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') + self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') + self.assertEqual(translate('[x'), r'(?s:\[x)\Z') class FilterTestCase(unittest.TestCase): diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index 06c10c1..2cff1c5 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -158,7 +158,7 @@ class PyclbrTest(TestCase): cm('cgi', ignore=('log',)) # set with = in module cm('pickle', ignore=('partial',)) cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module - cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property + cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property cm('pdb') cm('pydoc') diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index afe8738..79a7a05 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char)) self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match(upper_char + '(?i)', lower_char)) + def test_dollar_matches_twice(self): "$ matches the end of string, and just before the terminating \n" pattern = re.compile('$') @@ -143,6 +143,10 @@ Core and Builtins Library ------- +- Issue #22493: Inline flags now should be used only at the start of the + regular expression. Deprecation warning is emitted if uses them in the + middle of the regular expression. + - Issue #26885: xmlrpc now supports unmarshalling additional data types used by Apache XML-RPC implementation for numerics and None. |