summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-05-16 12:16:15 (GMT)
committerGitHub <noreply@github.com>2017-05-16 12:16:15 (GMT)
commitc7ac7280c321b3c1679fe5f657a6be0f86adf173 (patch)
treece201ce147377ada98b1b4ad55526e2eecfaaa8f
parent87fa8a780e9045a26c735f085c07bba4b2d0be60 (diff)
downloadcpython-c7ac7280c321b3c1679fe5f657a6be0f86adf173.zip
cpython-c7ac7280c321b3c1679fe5f657a6be0f86adf173.tar.gz
cpython-c7ac7280c321b3c1679fe5f657a6be0f86adf173.tar.bz2
bpo-30375: Correct the stacklevel of regex compiling warnings. (#1595)
Warnings emitted when compile a regular expression now always point to the line in the user code. Previously they could point into inners of the re module if emitted from inside of groups or conditionals.
-rw-r--r--Lib/sre_parse.py21
-rw-r--r--Lib/test/test_re.py17
-rw-r--r--Misc/NEWS4
3 files changed, 29 insertions, 13 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index f72408f..d59d642 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -414,7 +414,7 @@ def _uniq(items):
newitems.append(item)
return newitems
-def _parse_sub(source, state, verbose, nested=True):
+def _parse_sub(source, state, verbose, nested):
# parse an alternation: a|b|c
items = []
@@ -422,7 +422,8 @@ def _parse_sub(source, state, verbose, nested=True):
sourcematch = source.match
start = source.tell()
while True:
- itemsappend(_parse(source, state, verbose, not nested and not items))
+ itemsappend(_parse(source, state, verbose, nested + 1,
+ not nested and not items))
if not sourcematch("|"):
break
@@ -471,7 +472,7 @@ def _parse_sub(source, state, verbose, nested=True):
subpattern.append((BRANCH, (None, items)))
return subpattern
-def _parse(source, state, verbose, first=False):
+def _parse(source, state, verbose, nested, first=False):
# parse a simple pattern
subpattern = SubPattern(state)
@@ -708,7 +709,7 @@ def _parse(source, state, verbose, first=False):
lookbehindgroups = state.lookbehindgroups
if lookbehindgroups is None:
state.lookbehindgroups = state.groups
- p = _parse_sub(source, state, verbose)
+ p = _parse_sub(source, state, verbose, nested + 1)
if dir < 0:
if lookbehindgroups is None:
state.lookbehindgroups = None
@@ -744,9 +745,9 @@ def _parse(source, state, verbose, first=False):
msg = "invalid group reference %d" % condgroup
raise source.error(msg, len(condname) + 1)
state.checklookbehindgroup(condgroup, source)
- item_yes = _parse(source, state, verbose)
+ item_yes = _parse(source, state, verbose, nested + 1)
if source.match("|"):
- item_no = _parse(source, state, verbose)
+ item_no = _parse(source, state, verbose, nested + 1)
if source.next == "|":
raise source.error("conditional backref with more than two branches")
else:
@@ -768,7 +769,7 @@ def _parse(source, state, verbose, first=False):
source.string[:20], # truncate long regexes
' (truncated)' if len(source.string) > 20 else '',
),
- DeprecationWarning, stacklevel=7
+ DeprecationWarning, stacklevel=nested + 6
)
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
raise Verbose
@@ -788,7 +789,7 @@ def _parse(source, state, verbose, first=False):
raise source.error(err.msg, len(name) + 1) from None
sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
not (del_flags & SRE_FLAG_VERBOSE))
- p = _parse_sub(source, state, sub_verbose)
+ p = _parse_sub(source, state, sub_verbose, nested + 1)
if not source.match(")"):
raise source.error("missing ), unterminated subpattern",
source.tell() - start)
@@ -886,7 +887,7 @@ def parse(str, flags=0, pattern=None):
pattern.str = str
try:
- p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False)
+ p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
except Verbose:
# the VERBOSE flag was switched on inside the pattern. to be
# on the safe side, we'll parse the whole thing again...
@@ -894,7 +895,7 @@ def parse(str, flags=0, pattern=None):
pattern.flags = flags | SRE_FLAG_VERBOSE
pattern.str = str
source.seek(0)
- p = _parse_sub(source, pattern, True, False)
+ p = _parse_sub(source, pattern, True, 0)
p.pattern.flags = fix_flags(str, p.pattern.flags)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 1bb2654..027df40 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1370,6 +1370,7 @@ class ReTests(unittest.TestCase):
str(warns.warnings[0].message),
'Flags not at the start of the expression %s' % p
)
+ self.assertEqual(warns.warnings[0].filename, __file__)
p = upper_char + '(?i)%s' % ('.?' * 100)
with self.assertWarns(DeprecationWarning) as warns:
@@ -1378,6 +1379,7 @@ class ReTests(unittest.TestCase):
str(warns.warnings[0].message),
'Flags not at the start of the expression %s (truncated)' % p[:20]
)
+ self.assertEqual(warns.warnings[0].filename, __file__)
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
@@ -1389,14 +1391,23 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarns(DeprecationWarning) as warns:
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
- with self.assertWarns(DeprecationWarning):
+ self.assertRegex(str(warns.warnings[0].message),
+ 'Flags not at the start')
+ self.assertEqual(warns.warnings[0].filename, __file__)
+ with self.assertWarns(DeprecationWarning) as warns:
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
lower_char))
- with self.assertWarns(DeprecationWarning):
+ self.assertRegex(str(warns.warnings[0].message),
+ 'Flags not at the start')
+ self.assertEqual(warns.warnings[0].filename, __file__)
+ with self.assertWarns(DeprecationWarning) as warns:
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
lower_char))
+ self.assertRegex(str(warns.warnings[0].message),
+ 'Flags not at the start')
+ self.assertEqual(warns.warnings[0].filename, __file__)
def test_dollar_matches_twice(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 39f583f..5375073 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -323,6 +323,10 @@ Extension Modules
Library
-------
+- bpo-30375: Warnings emitted when compile a regular expression now always
+ point to the line in the user code. Previously they could point into inners
+ of the re module if emitted from inside of groups or conditionals.
+
- bpo-30329: imaplib and poplib now catch the Windows socket WSAEINVAL error
(code 10022) on shutdown(SHUT_RDWR): An invalid operation was attempted.
This error occurs sometimes on SSL connections.