From 05cb728d68a278d11466f9a6c8258d914135c96c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 16 Nov 2017 12:38:26 +0200 Subject: bpo-30349: Raise FutureWarning for nested sets and set operations (#1553) in regular expressions. --- Doc/library/re.rst | 16 +++++++- Doc/tools/susp-ignored.csv | 2 +- Doc/whatsnew/3.7.rst | 11 +++++ Lib/email/_header_value_parser.py | 9 ++--- Lib/re.py | 3 +- Lib/sre_parse.py | 24 +++++++++++ Lib/test/test_re.py | 47 +++++++++++++++++++++- .../2017-10-05-12-45-29.bpo-30349.6zKJsF.rst | 3 ++ 8 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst diff --git a/Doc/library/re.rst b/Doc/library/re.rst index cbb2f43..8c15462 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -200,6 +200,20 @@ The special characters are: place it at the beginning of the set. For example, both ``[()[\]{}]`` and ``[]()[{}]`` will both match a parenthesis. + * Support of nested sets and set operations as in `Unicode Technical + Standard #18`_ might be added in the future. This would change the + syntax, so to facilitate this change a :exc:`FutureWarning` will be raised + in ambiguous cases for the time being. + That include sets starting with a literal ``'['`` or containing literal + character sequences ``'--'``, ``'&&'``, ``'~~'``, and ``'||'``. To + avoid a warning escape them with a backslash. + + .. _Unicode Technical Standard #18: https://unicode.org/reports/tr18/ + + .. versionchanged:: 3.7 + :exc:`FutureWarning` is raised if a character set contains constructs + that will change semantically in the future. + ``|`` ``A|B``, where *A* and *B* can be arbitrary REs, creates a regular expression that will match either *A* or *B*. An arbitrary number of REs can be separated by the @@ -829,7 +843,7 @@ form. >>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:" >>> print('[%s]+' % re.escape(legal_chars)) - [abcdefghijklmnopqrstuvwxyz0123456789!\#\$%&'\*\+\-\.\^_`\|~:]+ + [abcdefghijklmnopqrstuvwxyz0123456789!\#\$%\&'\*\+\-\.\^_`\|\~:]+ >>> operators = ['+', '-', '*', '/', '**'] >>> print('|'.join(map(re.escape, sorted(operators, reverse=True)))) diff --git a/Doc/tools/susp-ignored.csv b/Doc/tools/susp-ignored.csv index 2b3ccf3..d52f81b 100644 --- a/Doc/tools/susp-ignored.csv +++ b/Doc/tools/susp-ignored.csv @@ -300,7 +300,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a whatsnew/3.2,,:location,zope9-location = ${zope9:location} whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf library/re,,`,!#$%&'*+-.^_`|~: -library/re,,`,!\#\$%&'\*\+\-\.\^_`\|~: +library/re,,`,!\#\$%\&'\*\+\-\.\^_`\|\~: library/tarfile,,:xz,'x:xz' library/xml.etree.elementtree,,:sometag,prefix:sometag library/xml.etree.elementtree,,:fictional,"@_`~' + LITERAL_CHARS = string.ascii_letters + string.digits + '!"%\',/:;<=>@_`' def test_re_escape(self): p = ''.join(chr(i) for i in range(256)) diff --git a/Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst b/Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst new file mode 100644 index 0000000..6862e02 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst @@ -0,0 +1,3 @@ +FutureWarning is now emitted if a regular expression contains character set +constructs that will change semantically in the future (nested sets and set +operations). -- cgit v0.12