summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2016-09-08 19:34:08 (GMT)
committerR David Murray <rdmurray@bitdance.com>2016-09-08 19:34:08 (GMT)
commit110b6fecbbb86143a4acb568f50eab2c870e7d34 (patch)
treeccc11fe14604c7c08bb750b392f67559a3cfd962
parent186122ead26f3ae4c2bc9f6715d2a29d339fdc5a (diff)
downloadcpython-110b6fecbbb86143a4acb568f50eab2c870e7d34.zip
cpython-110b6fecbbb86143a4acb568f50eab2c870e7d34.tar.gz
cpython-110b6fecbbb86143a4acb568f50eab2c870e7d34.tar.bz2
#27364: Deprecate invalid escape strings in str/byutes.
Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.
-rw-r--r--Doc/reference/lexical_analysis.rst4
-rw-r--r--Doc/whatsnew/3.6.rst5
-rw-r--r--Lib/test/test_codecs.py35
-rw-r--r--Lib/test/test_unicode.py7
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/bytesobject.c3
-rw-r--r--Objects/unicodeobject.c3
7 files changed, 48 insertions, 12 deletions
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index b3b71af..48f2043 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the
escape sequences only recognized in string literals fall into the category of
unrecognized escapes for bytes literals.
+ .. versionchanged:: 3.6
+ Unrecognized escape sequences produce a DeprecationWarning. In
+ some future version of Python they will be a SyntaxError.
+
Even in a raw literal, quotes can be escaped with a backslash, but the
backslash remains in the result; for example, ``r"\""`` is a valid string
literal consisting of two characters: a backslash and a double quote; ``r"\"``
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
index e53d48e..a76ac9d 100644
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -952,6 +952,11 @@ Deprecated features
parameter will be dropped in a future Python release and likely earlier
through third party tools. See :issue:`27919` for details.
+* A backslash-character pair that is not a valid escape sequence now generates
+ a DeprecationWarning. Although this will eventually become a SyntaxError,
+ that will not be for several Python releases. (Contributed by Emanuel Barry
+ in :issue:`27364`.)
+
Deprecated Python behavior
--------------------------
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 1af5524..4d91a07 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase):
check(b"[\\\n]", b"[]")
check(br'[\"]', b'["]')
check(br"[\']", b"[']")
- check(br"[\\]", br"[\]")
+ check(br"[\\]", b"[\\]")
check(br"[\a]", b"[\x07]")
check(br"[\b]", b"[\x08]")
check(br"[\t]", b"[\x09]")
@@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase):
check(br"[\f]", b"[\x0c]")
check(br"[\r]", b"[\x0d]")
check(br"[\7]", b"[\x07]")
- check(br"[\8]", br"[\8]")
check(br"[\78]", b"[\x078]")
check(br"[\41]", b"[!]")
check(br"[\418]", b"[!8]")
@@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase):
check(br"[\1010]", b"[A0]")
check(br"[\501]", b"[A]")
check(br"[\x41]", b"[A]")
- check(br"[\X41]", br"[\X41]")
check(br"[\x410]", b"[A0]")
- for b in range(256):
- if b not in b'\n"\'\\abtnvfr01234567x':
- b = bytes([b])
- check(b'\\' + b, b'\\' + b)
+ for i in range(97, 123):
+ b = bytes([i])
+ if b not in b'abfnrtvx':
+ with self.assertWarns(DeprecationWarning):
+ check(b"\\" + b, b"\\" + b)
+ with self.assertWarns(DeprecationWarning):
+ check(b"\\" + b.upper(), b"\\" + b.upper())
+ with self.assertWarns(DeprecationWarning):
+ check(br"\8", b"\\8")
+ with self.assertWarns(DeprecationWarning):
+ check(br"\9", b"\\9")
def test_errors(self):
decode = codecs.escape_decode
@@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase):
check(br"[\f]", "[\x0c]")
check(br"[\r]", "[\x0d]")
check(br"[\7]", "[\x07]")
- check(br"[\8]", r"[\8]")
check(br"[\78]", "[\x078]")
check(br"[\41]", "[!]")
check(br"[\418]", "[!8]")
@@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase):
check(br"[\x410]", "[A0]")
check(br"\u20ac", "\u20ac")
check(br"\U0001d120", "\U0001d120")
- for b in range(256):
- if b not in b'\n"\'\\abtnvfr01234567xuUN':
- check(b'\\' + bytes([b]), '\\' + chr(b))
+ for i in range(97, 123):
+ b = bytes([i])
+ if b not in b'abfnrtuvx':
+ with self.assertWarns(DeprecationWarning):
+ check(b"\\" + b, "\\" + chr(i))
+ if b.upper() not in b'UN':
+ with self.assertWarns(DeprecationWarning):
+ check(b"\\" + b.upper(), "\\" + chr(i-32))
+ with self.assertWarns(DeprecationWarning):
+ check(br"\8", "\\8")
+ with self.assertWarns(DeprecationWarning):
+ check(br"\9", "\\9")
def test_decode_errors(self):
decode = codecs.unicode_escape_decode
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 9ab624e..2684b94 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -10,6 +10,7 @@ import codecs
import itertools
import operator
import struct
+import string
import sys
import unittest
import warnings
@@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest,
support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str)
+ def test_invalid_sequences(self):
+ for letter in string.ascii_letters + "89": # 0-7 are octal escapes
+ if letter in "abfnrtuvxNU":
+ continue
+ with self.assertWarns(DeprecationWarning):
+ eval(r"'\%s'" % letter)
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index a55400f..8f1b724 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
Core and Builtins
-----------------
+- Issue #27364: A backslash-character pair that is not a valid escape sequence
+ now generates a DeprecationWarning.
+
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
and preserves insertion order.
(Concept developed by Raymond Hettinger and patch by Inada Naoki.)
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index b0d9b39..6e7c4fa 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
break;
default:
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
+ goto failed;
*p++ = '\\';
- s--;
goto non_esc; /* an arbitrary number of unescaped
UTF-8 bytes may follow. */
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7979eec..e0c3bfe 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto error;
default:
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\\%c'", c) < 0)
+ goto onError;
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
continue;