diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-05-16 12:16:15 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-16 12:16:15 (GMT) |
commit | c7ac7280c321b3c1679fe5f657a6be0f86adf173 (patch) | |
tree | ce201ce147377ada98b1b4ad55526e2eecfaaa8f | |
parent | 87fa8a780e9045a26c735f085c07bba4b2d0be60 (diff) | |
download | cpython-c7ac7280c321b3c1679fe5f657a6be0f86adf173.zip cpython-c7ac7280c321b3c1679fe5f657a6be0f86adf173.tar.gz cpython-c7ac7280c321b3c1679fe5f657a6be0f86adf173.tar.bz2 |
bpo-30375: Correct the stacklevel of regex compiling warnings. (#1595)
Warnings emitted when compile a regular expression now always point
to the line in the user code. Previously they could point into inners
of the re module if emitted from inside of groups or conditionals.
-rw-r--r-- | Lib/sre_parse.py | 21 | ||||
-rw-r--r-- | Lib/test/test_re.py | 17 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
3 files changed, 29 insertions, 13 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index f72408f..d59d642 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -414,7 +414,7 @@ def _uniq(items): newitems.append(item) return newitems -def _parse_sub(source, state, verbose, nested=True): +def _parse_sub(source, state, verbose, nested): # parse an alternation: a|b|c items = [] @@ -422,7 +422,8 @@ def _parse_sub(source, state, verbose, nested=True): sourcematch = source.match start = source.tell() while True: - itemsappend(_parse(source, state, verbose, not nested and not items)) + itemsappend(_parse(source, state, verbose, nested + 1, + not nested and not items)) if not sourcematch("|"): break @@ -471,7 +472,7 @@ def _parse_sub(source, state, verbose, nested=True): subpattern.append((BRANCH, (None, items))) return subpattern -def _parse(source, state, verbose, first=False): +def _parse(source, state, verbose, nested, first=False): # parse a simple pattern subpattern = SubPattern(state) @@ -708,7 +709,7 @@ def _parse(source, state, verbose, first=False): lookbehindgroups = state.lookbehindgroups if lookbehindgroups is None: state.lookbehindgroups = state.groups - p = _parse_sub(source, state, verbose) + p = _parse_sub(source, state, verbose, nested + 1) if dir < 0: if lookbehindgroups is None: state.lookbehindgroups = None @@ -744,9 +745,9 @@ def _parse(source, state, verbose, first=False): msg = "invalid group reference %d" % condgroup raise source.error(msg, len(condname) + 1) state.checklookbehindgroup(condgroup, source) - item_yes = _parse(source, state, verbose) + item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): - item_no = _parse(source, state, verbose) + item_no = _parse(source, state, verbose, nested + 1) if source.next == "|": raise source.error("conditional backref with more than two branches") else: @@ -768,7 +769,7 @@ def _parse(source, state, verbose, first=False): source.string[:20], # truncate long regexes ' (truncated)' if len(source.string) > 20 else '', ), - DeprecationWarning, stacklevel=7 + DeprecationWarning, stacklevel=nested + 6 ) if (state.flags & SRE_FLAG_VERBOSE) and not verbose: raise Verbose @@ -788,7 +789,7 @@ def _parse(source, state, verbose, first=False): raise source.error(err.msg, len(name) + 1) from None sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and not (del_flags & SRE_FLAG_VERBOSE)) - p = _parse_sub(source, state, sub_verbose) + p = _parse_sub(source, state, sub_verbose, nested + 1) if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) @@ -886,7 +887,7 @@ def parse(str, flags=0, pattern=None): pattern.str = str try: - p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False) + p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) except Verbose: # the VERBOSE flag was switched on inside the pattern. to be # on the safe side, we'll parse the whole thing again... @@ -894,7 +895,7 @@ def parse(str, flags=0, pattern=None): pattern.flags = flags | SRE_FLAG_VERBOSE pattern.str = str source.seek(0) - p = _parse_sub(source, pattern, True, False) + p = _parse_sub(source, pattern, True, 0) p.pattern.flags = fix_flags(str, p.pattern.flags) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 1bb2654..027df40 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1370,6 +1370,7 @@ class ReTests(unittest.TestCase): str(warns.warnings[0].message), 'Flags not at the start of the expression %s' % p ) + self.assertEqual(warns.warnings[0].filename, __file__) p = upper_char + '(?i)%s' % ('.?' * 100) with self.assertWarns(DeprecationWarning) as warns: @@ -1378,6 +1379,7 @@ class ReTests(unittest.TestCase): str(warns.warnings[0].message), 'Flags not at the start of the expression %s (truncated)' % p[:20] ) + self.assertEqual(warns.warnings[0].filename, __file__) with self.assertWarns(DeprecationWarning): self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char)) @@ -1389,14 +1391,23 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match('^(?i)' + upper_char, lower_char)) with self.assertWarns(DeprecationWarning): self.assertTrue(re.match('$|(?i)' + upper_char, lower_char)) - with self.assertWarns(DeprecationWarning): + with self.assertWarns(DeprecationWarning) as warns: self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char)) - with self.assertWarns(DeprecationWarning): + self.assertRegex(str(warns.warnings[0].message), + 'Flags not at the start') + self.assertEqual(warns.warnings[0].filename, __file__) + with self.assertWarns(DeprecationWarning) as warns: self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')', lower_char)) - with self.assertWarns(DeprecationWarning): + self.assertRegex(str(warns.warnings[0].message), + 'Flags not at the start') + self.assertEqual(warns.warnings[0].filename, __file__) + with self.assertWarns(DeprecationWarning) as warns: self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')', lower_char)) + self.assertRegex(str(warns.warnings[0].message), + 'Flags not at the start') + self.assertEqual(warns.warnings[0].filename, __file__) def test_dollar_matches_twice(self): @@ -323,6 +323,10 @@ Extension Modules Library ------- +- bpo-30375: Warnings emitted when compile a regular expression now always + point to the line in the user code. Previously they could point into inners + of the re module if emitted from inside of groups or conditionals. + - bpo-30329: imaplib and poplib now catch the Windows socket WSAEINVAL error (code 10022) on shutdown(SHUT_RDWR): An invalid operation was attempted. This error occurs sometimes on SSL connections. |