summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/sre_parse.py12
-rw-r--r--Lib/test/test_re.py59
-rw-r--r--Misc/NEWS6
3 files changed, 56 insertions, 21 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index ab37fd3..d8d1bd5 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
sourcematch = source.match
start = source.tell()
while True:
- itemsappend(_parse(source, state, verbose))
+ itemsappend(_parse(source, state, verbose, not nested and not items))
if not sourcematch("|"):
break
@@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
-def _parse(source, state, verbose):
+def _parse(source, state, verbose, first=False):
# parse a simple pattern
subpattern = SubPattern(state)
@@ -730,10 +730,9 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
- pos = source.pos
flags = _parse_flags(source, state, char)
if flags is None: # global flags
- if pos != 3: # "(?x"
+ if not first or subpattern:
import warnings
warnings.warn(
'Flags not at the start of the expression %s%s' % (
@@ -742,6 +741,8 @@ def _parse(source, state, verbose):
),
DeprecationWarning, stacklevel=7
)
+ if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
+ raise Verbose
continue
add_flags, del_flags = flags
group = None
@@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
msg = "unknown flag" if char.isalpha() else "missing -, : or )"
raise source.error(msg, len(char))
if char == ")":
- if ((add_flags & SRE_FLAG_VERBOSE) and
- not (state.flags & SRE_FLAG_VERBOSE)):
- raise Verbose
state.flags |= add_flags
return None
if add_flags & GLOBAL_FLAGS:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 3129f7e..4d71eea 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase):
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
- p = re.compile(upper_char, re.I | re.U)
- q = p.match(lower_char)
+ p = re.compile('.' + upper_char, re.I | re.S)
+ q = p.match('\n' + lower_char)
self.assertTrue(q)
- p = re.compile(lower_char, re.I | re.U)
- q = p.match(upper_char)
+ p = re.compile('.' + lower_char, re.I | re.S)
+ q = p.match('\n' + upper_char)
self.assertTrue(q)
- p = re.compile('(?i)' + upper_char, re.U)
- q = p.match(lower_char)
+ p = re.compile('(?i).' + upper_char, re.S)
+ q = p.match('\n' + lower_char)
self.assertTrue(q)
- p = re.compile('(?i)' + lower_char, re.U)
- q = p.match(upper_char)
+ p = re.compile('(?i).' + lower_char, re.S)
+ q = p.match('\n' + upper_char)
self.assertTrue(q)
- p = re.compile('(?iu)' + upper_char)
- q = p.match(lower_char)
+ p = re.compile('(?is).' + upper_char)
+ q = p.match('\n' + lower_char)
self.assertTrue(q)
- p = re.compile('(?iu)' + lower_char)
- q = p.match(upper_char)
+ p = re.compile('(?is).' + lower_char)
+ q = p.match('\n' + upper_char)
self.assertTrue(q)
- self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
- self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
+ p = re.compile('(?s)(?i).' + upper_char)
+ q = p.match('\n' + lower_char)
+ self.assertTrue(q)
+
+ p = re.compile('(?s)(?i).' + lower_char)
+ q = p.match('\n' + upper_char)
+ self.assertTrue(q)
+
+ self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
+ self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
+ self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
+ self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
+ self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
p = upper_char + '(?i)'
with self.assertWarns(DeprecationWarning) as warns:
@@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase):
'Flags not at the start of the expression %s (truncated)' % p[:20]
)
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
+ lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
+ lower_char))
+
+
def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')
diff --git a/Misc/NEWS b/Misc/NEWS
index 7a79521..997a034 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -320,6 +320,12 @@ Extension Modules
Library
-------
+- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
+ Now allowed several subsequential inline modifiers at the start of the
+ pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
+ now are allowed before and between inline modifiers (e.g.
+ ``'(?x) (?i) (?s)...'``).
+
- bpo-30285: Optimized case-insensitive matching and searching of regular
expressions.