summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_codeop.py1
-rw-r--r--Lib/test/test_grammar.py2
-rw-r--r--Lib/test/test_pdb.py4
-rw-r--r--Lib/test/test_syntax.py8
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-01-14-23-15-34.bpo-42864.QgOAQ1.rst2
-rw-r--r--Parser/pegen.c73
-rw-r--r--Parser/tokenizer.c5
-rw-r--r--Parser/tokenizer.h1
8 files changed, 88 insertions, 8 deletions
diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py
index 45d0a7d..1da6ca5 100644
--- a/Lib/test/test_codeop.py
+++ b/Lib/test/test_codeop.py
@@ -160,7 +160,6 @@ class CodeopTests(unittest.TestCase):
ai("","eval")
ai("\n","eval")
ai("(","eval")
- ai("(\n\n\n","eval")
ai("(9+","eval")
ai("9+ \\","eval")
ai("lambda z: \\","eval")
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index 2f6716d..0be869e 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -260,7 +260,7 @@ the \'lazy\' dog.\n\
for s in samples:
with self.assertRaises(SyntaxError) as cm:
compile(s, "<test>", "exec")
- self.assertIn("unexpected EOF", str(cm.exception))
+ self.assertIn("was never closed", str(cm.exception))
var_annot_global: int # a global annotated is necessary for test_var_annot
diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py
index 4bb574f..93b61dc 100644
--- a/Lib/test/test_pdb.py
+++ b/Lib/test/test_pdb.py
@@ -1649,10 +1649,10 @@ def bœr():
self.assertEqual(stdout.splitlines()[1:], [
'-> pass',
- '(Pdb) *** SyntaxError: unexpected EOF while parsing',
+ '(Pdb) *** SyntaxError: \'(\' was never closed',
'(Pdb) ENTERING RECURSIVE DEBUGGER',
- '*** SyntaxError: unexpected EOF while parsing',
+ '*** SyntaxError: \'(\' was never closed',
'LEAVING RECURSIVE DEBUGGER',
'(Pdb) ENTERING RECURSIVE DEBUGGER',
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index d825560..c8d191d 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -987,6 +987,14 @@ def func2():
self._check_error("A.\u03bc\\\n",
"unexpected EOF while parsing")
+ def test_error_parenthesis(self):
+ for paren in "([{":
+ self._check_error(paren + "1 + 2", f"\\{paren}' was never closed")
+
+ for paren in ")]}":
+ self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
+
+
def test_main():
support.run_unittest(SyntaxTestCase)
from test import test_syntax
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-14-23-15-34.bpo-42864.QgOAQ1.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-14-23-15-34.bpo-42864.QgOAQ1.rst
new file mode 100644
index 0000000..127a29f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-14-23-15-34.bpo-42864.QgOAQ1.rst
@@ -0,0 +1,2 @@
+Improve error messages in the parser when parentheses are not closed. Patch
+by Pablo Galindo.
diff --git a/Parser/pegen.c b/Parser/pegen.c
index a6f9792..6c27980 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -265,6 +265,16 @@ raise_decode_error(Parser *p)
return -1;
}
+static inline void
+raise_unclosed_parentheses_error(Parser *p) {
+ int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
+ int error_col = p->tok->parencolstack[p->tok->level-1];
+ RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
+ error_lineno, error_col,
+ "'%c' was never closed",
+ p->tok->parenstack[p->tok->level-1]);
+}
+
static void
raise_tokenizer_init_error(PyObject *filename)
{
@@ -324,7 +334,11 @@ tokenizer_error(Parser *p)
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
return -1;
case E_EOF:
- RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+ if (p->tok->level) {
+ raise_unclosed_parentheses_error(p);
+ } else {
+ RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+ }
return -1;
case E_DEDENT:
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
@@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p)
p->call_invalid_rules = 1;
}
+static int
+_PyPegen_check_tokenizer_errors(Parser *p) {
+ // Tokenize the whole input to see if there are any tokenization
+ // errors such as mistmatching parentheses. These will get priority
+ // over generic syntax errors only if the line number of the error is
+ // before the one that we had for the generic error.
+
+ // We don't want to tokenize to the end for interactive input
+ if (p->tok->prompt != NULL) {
+ return 0;
+ }
+
+
+ Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+ Py_ssize_t current_err_line = current_token->lineno;
+
+ // Save the tokenizer state to restore them later in case we found nothing
+ struct tok_state saved_tok;
+ memcpy(&saved_tok, p->tok, sizeof(struct tok_state));
+
+ for (;;) {
+ const char *start;
+ const char *end;
+ switch (PyTokenizer_Get(p->tok, &start, &end)) {
+ case ERRORTOKEN:
+ if (p->tok->level != 0) {
+ int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
+ if (current_err_line > error_lineno) {
+ raise_unclosed_parentheses_error(p);
+ return -1;
+ }
+ }
+ break;
+ case ENDMARKER:
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+
+ // Restore the tokenizer state
+ memcpy(p->tok, &saved_tok, sizeof(struct tok_state));
+ return 0;
+}
+
void *
_PyPegen_run_parser(Parser *p)
{
@@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p)
if (p->fill == 0) {
RAISE_SYNTAX_ERROR("error at start before reading any input");
}
- else if (p->tok->done == E_EOF) {
- RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+ else if (p->tok->done == E_EOF) {
+ if (p->tok->level) {
+ raise_unclosed_parentheses_error(p);
+ } else {
+ RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+ }
}
else {
if (p->tokens[p->fill-1]->type == INDENT) {
@@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p)
RAISE_INDENTATION_ERROR("unexpected unindent");
}
else {
+ if (_PyPegen_check_tokenizer_errors(p)) {
+ return NULL;
+ }
RAISE_SYNTAX_ERROR("invalid syntax");
}
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 62cd296..f9c8bf6 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -64,7 +64,6 @@ tok_new(void)
tok->tabsize = TABSIZE;
tok->indent = 0;
tok->indstack[0] = 0;
-
tok->atbol = 1;
tok->pendin = 0;
tok->prompt = tok->nextprompt = NULL;
@@ -1396,6 +1395,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Check for EOF and errors now */
if (c == EOF) {
+ if (tok->level) {
+ return ERRORTOKEN;
+ }
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
}
@@ -1818,6 +1820,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
tok->parenstack[tok->level] = c;
tok->parenlinenostack[tok->level] = tok->lineno;
+ tok->parencolstack[tok->level] = tok->start - tok->line_start;
tok->level++;
break;
case ')':
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index b659f34..56074b6 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -45,6 +45,7 @@ struct tok_state {
/* Used to allow free continuations inside them */
char parenstack[MAXLEVEL];
int parenlinenostack[MAXLEVEL];
+ int parencolstack[MAXLEVEL];
PyObject *filename;
/* Stuff for checking on different tab sizes */
int altindstack[MAXINDENT]; /* Stack of alternate indents */