diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-04-30 10:16:27 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-30 10:16:27 (GMT) |
commit | 3483299a24e41a7f2e958369cb3573d7c2253e33 (patch) | |
tree | f51b90e97184249f71c6df08579a2f4a8899d43a | |
parent | a055dac0b45031878a8196a8735522de018491e3 (diff) | |
download | cpython-3483299a24e41a7f2e958369cb3573d7c2253e33.zip cpython-3483299a24e41a7f2e958369cb3573d7c2253e33.tar.gz cpython-3483299a24e41a7f2e958369cb3573d7c2253e33.tar.bz2 |
gh-81548: Deprecate octal escape sequences with value larger than 0o377 (GH-91668)
-rw-r--r-- | Doc/reference/lexical_analysis.rst | 5 | ||||
-rw-r--r-- | Doc/whatsnew/3.11.rst | 6 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 7 | ||||
-rw-r--r-- | Lib/test/test_string_literals.py | 53 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2022-04-18-20-25-01.gh-issue-81548.n3VYgp.rst | 3 | ||||
-rw-r--r-- | Objects/bytesobject.c | 29 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 29 | ||||
-rw-r--r-- | Parser/string_parser.c | 25 |
8 files changed, 139 insertions, 18 deletions
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index dba1a9d..0e64a05 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -596,6 +596,11 @@ Notes: (1) As in Standard C, up to three octal digits are accepted. + .. versionchanged:: 3.11 + Octal escapes with value larger than ``0o377`` produce a :exc:`DeprecationWarning`. + In a future Python version they will be a :exc:`SyntaxWarning` and + eventually a :exc:`SyntaxError`. + (2) Unlike in Standard C, exactly two hex digits are required. diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 1a692f2..59c8dd6 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1055,6 +1055,12 @@ CPython bytecode changes Deprecated ========== +* Octal escapes with value larger than ``0o377`` now produce + a :exc:`DeprecationWarning`. + In a future Python version they will be a :exc:`SyntaxWarning` and + eventually a :exc:`SyntaxError`. + (Contributed by Serhiy Storchaka in :issue:`81548`.) + * The :mod:`lib2to3` package and ``2to3`` tool are now deprecated and may not be able to parse Python 3.10 or newer. See the :pep:`617` (New PEG parser for CPython). (Contributed by Victor Stinner in :issue:`40360`.) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 5853e08..42c600d 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1193,7 +1193,6 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\418]", b"[!8]") check(br"[\101]", b"[A]") check(br"[\1010]", b"[A0]") - check(br"[\501]", b"[A]") check(br"[\x41]", b"[A]") check(br"[\x410]", b"[A0]") for i in range(97, 123): @@ -1209,6 +1208,9 @@ class EscapeDecodeTest(unittest.TestCase): check(br"\9", b"\\9") with self.assertWarns(DeprecationWarning): check(b"\\\xfa", b"\\\xfa") + for i in range(0o400, 0o1000): + with self.assertWarns(DeprecationWarning): + check(rb'\%o' % i, bytes([i & 0o377])) def test_errors(self): decode = codecs.escape_decode @@ -2435,6 +2437,9 @@ class UnicodeEscapeTest(ReadTest, unittest.TestCase): check(br"\9", "\\9") with self.assertWarns(DeprecationWarning): check(b"\\\xfa", "\\\xfa") + for i in range(0o400, 0o1000): + with self.assertWarns(DeprecationWarning): + check(rb'\%o' % i, chr(i)) def test_decode_errors(self): decode = codecs.unicode_escape_decode diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py index 7231970..3a3830b 100644 --- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -116,6 +116,7 @@ class TestLiterals(unittest.TestCase): warnings.simplefilter('always', category=DeprecationWarning) eval("'''\n\\z'''") self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") self.assertEqual(w[0].filename, '<string>') self.assertEqual(w[0].lineno, 1) @@ -125,6 +126,32 @@ class TestLiterals(unittest.TestCase): eval("'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.filename, '<string>') + self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.offset, 1) + + def test_eval_str_invalid_octal_escape(self): + for i in range(0o400, 0o1000): + with self.assertWarns(DeprecationWarning): + self.assertEqual(eval(r"'\%o'" % i), chr(i)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', category=DeprecationWarning) + eval("'''\n\\407'''") + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), + r"invalid octal escape sequence '\407'") + self.assertEqual(w[0].filename, '<string>') + self.assertEqual(w[0].lineno, 1) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=DeprecationWarning) + with self.assertRaises(SyntaxError) as cm: + eval("'''\n\\407'''") + exc = cm.exception + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") self.assertEqual(exc.filename, '<string>') self.assertEqual(exc.lineno, 1) self.assertEqual(exc.offset, 1) @@ -166,6 +193,7 @@ class TestLiterals(unittest.TestCase): warnings.simplefilter('always', category=DeprecationWarning) eval("b'''\n\\z'''") self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") self.assertEqual(w[0].filename, '<string>') self.assertEqual(w[0].lineno, 1) @@ -175,6 +203,31 @@ class TestLiterals(unittest.TestCase): eval("b'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.filename, '<string>') + self.assertEqual(exc.lineno, 1) + + def test_eval_bytes_invalid_octal_escape(self): + for i in range(0o400, 0o1000): + with self.assertWarns(DeprecationWarning): + self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377])) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', category=DeprecationWarning) + eval("b'''\n\\407'''") + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), + r"invalid octal escape sequence '\407'") + self.assertEqual(w[0].filename, '<string>') + self.assertEqual(w[0].lineno, 1) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=DeprecationWarning) + with self.assertRaises(SyntaxError) as cm: + eval("b'''\n\\407'''") + exc = cm.exception + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") self.assertEqual(exc.filename, '<string>') self.assertEqual(exc.lineno, 1) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-04-18-20-25-01.gh-issue-81548.n3VYgp.rst b/Misc/NEWS.d/next/Core and Builtins/2022-04-18-20-25-01.gh-issue-81548.n3VYgp.rst new file mode 100644 index 0000000..56b1fd6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-04-18-20-25-01.gh-issue-81548.n3VYgp.rst @@ -0,0 +1,3 @@ +Octal escapes with value larger than ``0o377`` now produce a +:exc:`DeprecationWarning`. In a future Python version they will be a +:exc:`SyntaxWarning` and eventually a :exc:`SyntaxError`. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 510a836..b5066d0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1113,6 +1113,12 @@ PyObject *_PyBytes_DecodeEscape(const char *s, if (s < end && '0' <= *s && *s <= '7') c = (c<<3) + *s++ - '0'; } + if (c > 0377) { + if (*first_invalid_escape == NULL) { + *first_invalid_escape = s-3; /* Back up 3 chars, since we've + already incremented s. */ + } + } *p++ = c; break; case 'x': @@ -1179,11 +1185,24 @@ PyObject *PyBytes_DecodeEscape(const char *s, if (result == NULL) return NULL; if (first_invalid_escape != NULL) { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", - (unsigned char)*first_invalid_escape) < 0) { - Py_DECREF(result); - return NULL; + unsigned char c = *first_invalid_escape; + if ('4' <= c && c <= '7') { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid octal escape sequence '\\%.3s'", + first_invalid_escape) < 0) + { + Py_DECREF(result); + return NULL; + } + } + else { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", + c) < 0) + { + Py_DECREF(result); + return NULL; + } } } return result; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7768f66..4933075 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6404,6 +6404,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, ch = (ch<<3) + *s++ - '0'; } } + if (ch > 0377) { + if (*first_invalid_escape == NULL) { + *first_invalid_escape = s-3; /* Back up 3 chars, since we've + already incremented s. */ + } + } WRITE_CHAR(ch); continue; @@ -6554,11 +6560,24 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, if (result == NULL) return NULL; if (first_invalid_escape != NULL) { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", - (unsigned char)*first_invalid_escape) < 0) { - Py_DECREF(result); - return NULL; + unsigned char c = *first_invalid_escape; + if ('4' <= c && c <= '7') { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid octal escape sequence '\\%.3s'", + first_invalid_escape) < 0) + { + Py_DECREF(result); + return NULL; + } + } + else { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", + c) < 0) + { + Py_DECREF(result); + return NULL; + } } } return result; diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 65ddd46..9c12d8c 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -9,10 +9,15 @@ //// STRING HANDLING FUNCTIONS //// static int -warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t) +warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t) { + unsigned char c = *first_invalid_escape; + int octal = ('4' <= c && c <= '7'); PyObject *msg = - PyUnicode_FromFormat("invalid escape sequence '\\%c'", first_invalid_escape_char); + octal + ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'", + first_invalid_escape) + : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c); if (msg == NULL) { return -1; } @@ -27,7 +32,13 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the error location, if p->known_err_token is not set. */ p->known_err_token = t; - RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", first_invalid_escape_char); + if (octal) { + RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", + first_invalid_escape); + } + else { + RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); + } } Py_DECREF(msg); return -1; @@ -118,7 +129,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape); if (v != NULL && first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) { + if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) { /* We have not decref u before because first_invalid_escape points inside u. */ Py_XDECREF(u); @@ -140,7 +151,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) } if (first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) { + if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) { Py_DECREF(result); return NULL; } @@ -357,7 +368,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, break; } } - + if (s == expr_end) { if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') { RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end); @@ -465,7 +476,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw, decode_unicode_with_escapes(). */ continue; } - if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) { + if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) { return -1; } } |