bpo-44201: Avoid side effects of "invalid_*" rules in the REPL (GH-26298)

When the parser does a second pass to check for errors, these rules can have some small side-effects as they may advance the parser more than the point reached in the first pass. This can cause the tokenizer to ask for extra tokens in interactive mode causing the tokenizer to show the prompt instead of failing instantly. To avoid this, add a new mode to the tokenizer that is activated in the second pass and deactivates asking for new tokens when the interactive line is finished. As the parsing should have reached the last line in the first pass, the second pass should not need to ask for more tokens.
author: Pablo Galindo <Pablogsal@gmail.com> 2021-05-22 22:05:00 (GMT)
committer: GitHub <noreply@github.com> 2021-05-22 22:05:00 (GMT)
commit: bd7476dae337e905e7b1bbf33ddb96cc270fdc84 (patch)
tree: 2d2db18ce2b6c7f3c7ee3df6da68523aa812ede2 /Parser
parent: 2a1e6698b10a7b58c6ac66429de0f51cb739da35 (diff)
download: cpython-bd7476dae337e905e7b1bbf33ddb96cc270fdc84.zip
cpython-bd7476dae337e905e7b1bbf33ddb96cc270fdc84.tar.gz
cpython-bd7476dae337e905e7b1bbf33ddb96cc270fdc84.tar.bz2
3 files changed, 22 insertions, 0 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 3c25e4d..548a647 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -1234,6 +1234,9 @@ reset_parser_state(Parser *p)
     }
     p->mark = 0;
     p->call_invalid_rules = 1;
+    // Don't try to get extra tokens in interactive mode when trying to
+    // raise specialized errors in the second pass.
+    p->tok->interactive_underflow = IUNDERFLOW_STOP;
 }
 
 static int
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index ad32293..a86af9b 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -85,6 +85,7 @@ tok_new(void)
     tok->async_def = 0;
     tok->async_def_indent = 0;
     tok->async_def_nl = 0;
+    tok->interactive_underflow = IUNDERFLOW_NORMAL;
 
     return tok;
 }
@@ -845,6 +846,10 @@ tok_underflow_string(struct tok_state *tok) {
 
 static int
 tok_underflow_interactive(struct tok_state *tok) {
+    if (tok->interactive_underflow == IUNDERFLOW_STOP) {
+        tok->done = E_INTERACT_STOP;
+        return 1;
+    }
     char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
     if (newtok != NULL) {
         char *translated = translate_newlines(newtok, 0, tok);
@@ -1399,6 +1404,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         }
     }
 
+    if (tok->done == E_INTERACT_STOP) {
+        return ENDMARKER;
+    }
+
     /* Check for EOF and errors now */
     if (c == EOF) {
         if (tok->level) {
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index aaa31f3..ff563d5 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -19,6 +19,14 @@ enum decoding_state {
     STATE_NORMAL
 };
 
+enum interactive_underflow_t {
+    /* Normal mode of operation: return a new token when asked in interactie mode */
+    IUNDERFLOW_NORMAL,
+    /* Forcefully return ENDMARKER when asked for a new token in interactive mode. This
+     * can be used to prevent the tokenizer to promt the user for new tokens */
+    IUNDERFLOW_STOP,
+};
+
 /* Tokenizer state */
 struct tok_state {
     /* Input state; buf <= cur <= inp <= end */
@@ -74,6 +82,8 @@ struct tok_state {
     int async_def_indent; /* Indentation level of the outermost 'async def'. */
     int async_def_nl;     /* =1 if the outermost 'async def' had at least one
                              NEWLINE token after it. */
+    /* How to proceed when asked for a new token in interactive mode */
+    enum interactive_underflow_t interactive_underflow; 
 };
 
 extern struct tok_state *PyTokenizer_FromString(const char *, int);
author	Pablo Galindo <Pablogsal@gmail.com>	2021-05-22 22:05:00 (GMT)
committer	GitHub <noreply@github.com>	2021-05-22 22:05:00 (GMT)
commit	bd7476dae337e905e7b1bbf33ddb96cc270fdc84 (patch)
tree	2d2db18ce2b6c7f3c7ee3df6da68523aa812ede2 /Parser
parent	2a1e6698b10a7b58c6ac66429de0f51cb739da35 (diff)
download	cpython-bd7476dae337e905e7b1bbf33ddb96cc270fdc84.zip cpython-bd7476dae337e905e7b1bbf33ddb96cc270fdc84.tar.gz cpython-bd7476dae337e905e7b1bbf33ddb96cc270fdc84.tar.bz2