summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLysandros Nikolaou <lisandrosnik@gmail.com>2023-05-07 10:12:04 (GMT)
committerGitHub <noreply@github.com>2023-05-07 10:12:04 (GMT)
commita09d3901a5329fd58a29f730ae5f48fb38f66320 (patch)
treea8504553d8184a6281dc8099503559f494482d22
parentc5dafeaa6d2dddd1d9e611424d8abf3a934880c6 (diff)
downloadcpython-a09d3901a5329fd58a29f730ae5f48fb38f66320.zip
cpython-a09d3901a5329fd58a29f730ae5f48fb38f66320.tar.gz
cpython-a09d3901a5329fd58a29f730ae5f48fb38f66320.tar.bz2
[3.11] gh-96670: Raise SyntaxError when parsing NULL bytes (GH-97594) (#104195)
-rw-r--r--Include/cpython/fileobject.h1
-rw-r--r--Lib/test/test_ast.py4
-rw-r--r--Lib/test/test_builtin.py3
-rw-r--r--Lib/test/test_cmd_line_script.py25
-rw-r--r--Lib/test/test_compile.py8
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst2
-rw-r--r--Objects/fileobject.c29
-rw-r--r--Parser/tokenizer.c25
-rw-r--r--Python/pythonrun.c2
9 files changed, 77 insertions, 22 deletions
diff --git a/Include/cpython/fileobject.h b/Include/cpython/fileobject.h
index cff2243..b70ec31 100644
--- a/Include/cpython/fileobject.h
+++ b/Include/cpython/fileobject.h
@@ -3,6 +3,7 @@
#endif
PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
+PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*);
/* The std printer acts as a preliminary sys.stderr until the new io
infrastructure is in place. */
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index b4ec1fe..7d9d0c4 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -857,6 +857,10 @@ class AST_Tests(unittest.TestCase):
check_limit("a", "[0]")
check_limit("a", "*a")
+ def test_null_bytes(self):
+ with self.assertRaises(SyntaxError,
+ msg="source code string cannot contain null bytes"):
+ ast.parse("a\0b")
class ASTHelpers_Test(unittest.TestCase):
maxDiff = None
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 0f6d2db..9078c40 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -334,11 +334,10 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(TypeError, compile)
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
- self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
mode='eval', source='0', filename='tmp')
compile('print("\xe5")\n', '', 'exec')
- self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
+ self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec')
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
# test the optimize argument
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 4dadbc0..d100127 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -657,6 +657,31 @@ class CmdLineTest(unittest.TestCase):
],
)
+ def test_syntaxerror_null_bytes(self):
+ script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
+ with os_helper.temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, 'script', script)
+ exitcode, stdout, stderr = assert_python_failure(script_name)
+ self.assertEqual(
+ stderr.splitlines()[-2:],
+ [ b" x = '",
+ b'SyntaxError: source code cannot contain null bytes'
+ ],
+ )
+
+ def test_syntaxerror_null_bytes_in_multiline_string(self):
+ scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings
+ with os_helper.temp_dir() as script_dir:
+ for script in scripts:
+ script_name = _make_test_script(script_dir, 'script', script)
+ _, _, stderr = assert_python_failure(script_name)
+ self.assertEqual(
+ stderr.splitlines()[-2:],
+ [ b" multilinestring",
+ b'SyntaxError: source code cannot contain null bytes'
+ ]
+ )
+
def test_consistent_sys_path_for_direct_execution(self):
# This test case ensures that the following all give the same
# sys.path configuration:
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 54e9066..c96ae43 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -542,7 +542,7 @@ if 1:
with open(fn, "wb") as fp:
fp.write(src)
res = script_helper.run_python_until_end(fn)[0]
- self.assertIn(b"Non-UTF-8", res.err)
+ self.assertIn(b"source code cannot contain null bytes", res.err)
def test_yet_more_evil_still_undecodable(self):
# Issue #25388
@@ -552,7 +552,7 @@ if 1:
with open(fn, "wb") as fp:
fp.write(src)
res = script_helper.run_python_until_end(fn)[0]
- self.assertIn(b"Non-UTF-8", res.err)
+ self.assertIn(b"source code cannot contain null bytes", res.err)
@support.cpython_only
def test_compiler_recursion_limit(self):
@@ -588,9 +588,9 @@ if 1:
def test_null_terminated(self):
# The source code is null-terminated internally, but bytes-like
# objects are accepted, which could be not terminated.
- with self.assertRaisesRegex(ValueError, "cannot contain null"):
+ with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
compile("123\x00", "<dummy>", "eval")
- with self.assertRaisesRegex(ValueError, "cannot contain null"):
+ with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
compile(memoryview(b"123\x00"), "<dummy>", "eval")
code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
self.assertEqual(eval(code), 23)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst b/Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst
new file mode 100644
index 0000000..eea90e7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst
@@ -0,0 +1,2 @@
+The parser now raises :exc:`SyntaxError` when parsing source code containing
+null bytes. Backported from ``aab01e3``. Patch by Pablo Galindo
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 8dba5b9..ffe55eb 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr)
return 1;
}
-/*
-** Py_UniversalNewlineFgets is an fgets variation that understands
-** all of \r, \n and \r\n conventions.
-** The stream should be opened in binary mode.
-** The fobj parameter exists solely for legacy reasons and must be NULL.
-** Note that we need no error handling: fgets() treats error and eof
-** identically.
-*/
char *
-Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+_Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size)
{
char *p = buf;
int c;
@@ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
}
FUNLOCKFILE(stream);
*p = '\0';
- if (p == buf)
+ if (p == buf) {
return NULL;
+ }
+ *size = p - buf;
return buf;
}
+/*
+** Py_UniversalNewlineFgets is an fgets variation that understands
+** all of \r, \n and \r\n conventions.
+** The stream should be opened in binary mode.
+** The fobj parameter exists solely for legacy reasons and must be NULL.
+** Note that we need no error handling: fgets() treats error and eof
+** identically.
+*/
+
+char *
+Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) {
+ size_t size;
+ return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size);
+}
+
/* **************************** std printer ****************************
* The stdprinter is used during the boot strapping phase as a preliminary
* file like object for sys.stderr.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index ca11c7b..b552b41 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -376,6 +376,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
return 1;
}
+static inline int
+contains_null_bytes(const char* str, size_t size) {
+ return memchr(str, 0, size) != NULL;
+}
+
static int
tok_readline_recode(struct tok_state *tok) {
PyObject *line;
@@ -831,9 +836,9 @@ tok_readline_raw(struct tok_state *tok)
if (!tok_reserve_buf(tok, BUFSIZ)) {
return 0;
}
- char *line = Py_UniversalNewlineFgets(tok->inp,
- (int)(tok->end - tok->inp),
- tok->fp, NULL);
+ int n_chars = (int)(tok->end - tok->inp);
+ size_t line_size = 0;
+ char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
if (line == NULL) {
return 1;
}
@@ -841,7 +846,7 @@ tok_readline_raw(struct tok_state *tok)
tok_concatenate_interactive_new_line(tok, line) == -1) {
return 0;
}
- tok->inp = strchr(tok->inp, '\0');
+ tok->inp += line_size;
if (tok->inp == tok->buf) {
return 0;
}
@@ -1078,6 +1083,12 @@ tok_nextc(struct tok_state *tok)
return EOF;
}
tok->line_start = tok->cur;
+
+ if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
+ syntaxerror(tok, "source code cannot contain null bytes");
+ tok->cur = tok->inp;
+ return EOF;
+ }
}
Py_UNREACHABLE();
}
@@ -1987,8 +1998,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
- if (tok->done == E_DECODE)
+ if (tok->done == E_ERROR) {
+ return ERRORTOKEN;
+ }
+ if (tok->done == E_DECODE) {
break;
+ }
if (c == EOF || (quote_size == 1 && c == '\n')) {
assert(tok->multi_line_start != NULL);
// shift the tok_state's location into
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index f12b9f6..efa22b0 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1859,7 +1859,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
}
if (strlen(str) != (size_t)size) {
- PyErr_SetString(PyExc_ValueError,
+ PyErr_SetString(PyExc_SyntaxError,
"source code string cannot contain null bytes");
Py_CLEAR(*cmd_copy);
return NULL;