summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLysandros Nikolaou <lisandrosnik@gmail.com>2020-04-29 01:42:27 (GMT)
committerGitHub <noreply@github.com>2020-04-29 01:42:27 (GMT)
commit6d6508765514c7c10719478a0430f5e47c9a96ac (patch)
treecab1123e87fe03272d86dae862cf335d9e709e81
parenta4dfe8ede5a37576e17035dccfe109ba7752237e (diff)
downloadcpython-6d6508765514c7c10719478a0430f5e47c9a96ac.zip
cpython-6d6508765514c7c10719478a0430f5e47c9a96ac.tar.gz
cpython-6d6508765514c7c10719478a0430f5e47c9a96ac.tar.bz2
bpo-40334: Disallow invalid single statements in the new parser (GH-19774)
After parsing is done in single statement mode, the tokenizer buffer has to be checked for additional lines and a `SyntaxError` must be raised, in case there are any. Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
-rw-r--r--Lib/test/test_compile.py1
-rw-r--r--Parser/pegen/pegen.c51
2 files changed, 51 insertions, 1 deletions
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index a507ac0..566ca27 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -501,7 +501,6 @@ if 1:
self.compile_single("if x:\n f(x)\nelse:\n g(x)")
self.compile_single("class T:\n pass")
- @support.skip_if_new_parser('Pegen does not disallow multiline single stmts')
def test_bad_single_statement(self):
self.assertInvalidSingle('1\n2')
self.assertInvalidSingle('def f(): pass')
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index ef95aac..39da270 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -911,6 +911,52 @@ _PyPegen_number_token(Parser *p)
p->arena);
}
+static int // bool
+newline_in_string(Parser *p, const char *cur)
+{
+ for (char c = *cur; cur >= p->tok->buf; c = *--cur) {
+ if (c == '\'' || c == '"') {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Check that the source for a single input statement really is a single
+ statement by looking at what is left in the buffer after parsing.
+ Trailing whitespace and comments are OK. */
+static int // bool
+bad_single_statement(Parser *p)
+{
+ const char *cur = strchr(p->tok->buf, '\n');
+
+ /* Newlines are allowed if preceded by a line continuation character
+ or if they appear inside a string. */
+ if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
+ return 0;
+ }
+ char c = *cur;
+
+ for (;;) {
+ while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
+ c = *++cur;
+ }
+
+ if (!c) {
+ return 0;
+ }
+
+ if (c != '#') {
+ return 1;
+ }
+
+ /* Suck up comment. */
+ while (c && c != '\n') {
+ c = *++cur;
+ }
+ }
+}
+
void
_PyPegen_Parser_Free(Parser *p)
{
@@ -1014,6 +1060,11 @@ _PyPegen_run_parser(Parser *p)
return NULL;
}
+ if (p->start_rule == Py_single_input && bad_single_statement(p)) {
+ p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
+ return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
+ }
+
return res;
}