summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-11-16 10:38:26 (GMT)
committerGitHub <noreply@github.com>2017-11-16 10:38:26 (GMT)
commit05cb728d68a278d11466f9a6c8258d914135c96c (patch)
treeda7fd67bdacf4239d820bcf40cad9f60cab9fb82 /Lib
parent3daaafb700df45716bb55f3a293f88773baf3463 (diff)
downloadcpython-05cb728d68a278d11466f9a6c8258d914135c96c.zip
cpython-05cb728d68a278d11466f9a6c8258d914135c96c.tar.gz
cpython-05cb728d68a278d11466f9a6c8258d914135c96c.tar.bz2
bpo-30349: Raise FutureWarning for nested sets and set operations (#1553)
in regular expressions.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/email/_header_value_parser.py9
-rw-r--r--Lib/re.py3
-rw-r--r--Lib/sre_parse.py24
-rw-r--r--Lib/test/test_re.py47
4 files changed, 76 insertions, 7 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 9b9697f..b4737c8 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1354,15 +1354,14 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
_non_atom_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match
+ re.escape(''.join(ATOM_ENDS)))).match
_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
_non_token_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match
+ re.escape(''.join(TOKEN_ENDS)))).match
_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match
+ re.escape(''.join(ATTRIBUTE_ENDS)))).match
_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(EXTENDED_ATTRIBUTE_ENDS).replace(
- '\\','\\\\').replace(']',r'\]'))).match
+ re.escape(''.join(EXTENDED_ATTRIBUTE_ENDS)))).match
def _validate_xtext(xtext):
"""If input token contains ASCII non-printables, register a defect."""
diff --git a/Lib/re.py b/Lib/re.py
index abbf8d6..a8b6753 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -251,8 +251,9 @@ def template(pattern, flags=0):
# SPECIAL_CHARS
# closing ')', '}' and ']'
# '-' (a range in character set)
+# '&', '~', (extended character set operations)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
-_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.# \t\n\r\v\f'}
+_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'}
def escape(pattern):
"""
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 8527412..a53735b 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -517,6 +517,12 @@ def _parse(source, state, verbose, nested, first=False):
setappend = set.append
## if sourcematch(":"):
## pass # handle character classes
+ if source.next == '[':
+ import warnings
+ warnings.warn(
+ 'Possible nested set at position %d' % source.tell(),
+ FutureWarning, stacklevel=nested + 6
+ )
negate = sourcematch("^")
# check remaining characters
while True:
@@ -529,6 +535,17 @@ def _parse(source, state, verbose, nested, first=False):
elif this[0] == "\\":
code1 = _class_escape(source, this)
else:
+ if set and this in '-&~|' and source.next == this:
+ import warnings
+ warnings.warn(
+ 'Possible set %s at position %d' % (
+ 'difference' if this == '-' else
+ 'intersection' if this == '&' else
+ 'symmetric difference' if this == '~' else
+ 'union',
+ source.tell() - 1),
+ FutureWarning, stacklevel=nested + 6
+ )
code1 = LITERAL, _ord(this)
if sourcematch("-"):
# potential range
@@ -545,6 +562,13 @@ def _parse(source, state, verbose, nested, first=False):
if that[0] == "\\":
code2 = _class_escape(source, that)
else:
+ if that == '-':
+ import warnings
+ warnings.warn(
+ 'Possible set difference at position %d' % (
+ source.tell() - 2),
+ FutureWarning, stacklevel=nested + 6
+ )
code2 = LITERAL, _ord(that)
if code1[0] != LITERAL or code2[0] != LITERAL:
msg = "bad character range %s-%s" % (this, that)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index fc015e4..ee87446 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -914,6 +914,51 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb")
+ def test_possible_set_operations(self):
+ s = bytes(range(128)).decode()
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[0-9--1]')
+ self.assertEqual(p.findall(s), list('-./0123456789'))
+ self.assertEqual(re.findall(r'[--1]', s), list('-./01'))
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[%--1]')
+ self.assertEqual(p.findall(s), list("%&'()*+,-1"))
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[%--]')
+ self.assertEqual(p.findall(s), list("%&'()*+,-"))
+
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[0-9&&1]')
+ self.assertEqual(p.findall(s), list('&0123456789'))
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[\d&&1]')
+ self.assertEqual(p.findall(s), list('&0123456789'))
+ self.assertEqual(re.findall(r'[&&1]', s), list('&1'))
+
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[0-9||a]')
+ self.assertEqual(p.findall(s), list('0123456789a|'))
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[\d||a]')
+ self.assertEqual(p.findall(s), list('0123456789a|'))
+ self.assertEqual(re.findall(r'[||1]', s), list('1|'))
+
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[0-9~~1]')
+ self.assertEqual(p.findall(s), list('0123456789~'))
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[\d~~1]')
+ self.assertEqual(p.findall(s), list('0123456789~'))
+ self.assertEqual(re.findall(r'[~~1]', s), list('1~'))
+
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[[0-9]|]')
+ self.assertEqual(p.findall(s), list('0123456789[]'))
+
+ with self.assertWarns(FutureWarning):
+ p = re.compile(r'[[:digit:]|]')
+ self.assertEqual(p.findall(s), list(':[]dgit'))
+
def test_search_coverage(self):
self.assertEqual(re.search(r"\s(b)", " b").group(1), "b")
self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
@@ -932,7 +977,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(), match)
self.assertEqual(m.span(), span)
- LITERAL_CHARS = string.ascii_letters + string.digits + '!"%&\',/:;<=>@_`~'
+ LITERAL_CHARS = string.ascii_letters + string.digits + '!"%\',/:;<=>@_`'
def test_re_escape(self):
p = ''.join(chr(i) for i in range(256))