bpo-42827: Fix crash on SyntaxError in multiline expressions (GH-24140)

When trying to extract the error line for the error message there are two distinct cases: 1. The input comes from a file, which means that we can extract the error line by using `PyErr_ProgramTextObject` and which we already do. 2. The input does not come from a file, at which point we need to get the source code from the tokenizer: * If the tokenizer's current line number is the same with the line of the error, we get the line from `tok->buf` and we're ready. * Else, we can extract the error line from the source code in the following two ways: * If the input comes from a string we have all the input in `tok->str` and we can extract the error line from it. * If the input comes from stdin, i.e. the interactive prompt, we do not have access to the previous line. That's why a new field `tok->stdin_content` is added which holds the whole input for the current (multiline) statement or expression. We can then extract the error line from `tok->stdin_content` like we do in the string case above. Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
author: Lysandros Nikolaou <lisandrosnik@gmail.com> 2021-01-14 21:36:30 (GMT)
committer: GitHub <noreply@github.com> 2021-01-14 21:36:30 (GMT)
commit: e5fe509054183bed9aef42c92da8407d339e8af8 (patch)
tree: 74174755289b6d7f87fea41612d9882f9f8202ba /Parser/tokenizer.c
parent: 971235827754eee6c0d9f7d39b52fecdfd4cb7b4 (diff)
download: cpython-e5fe509054183bed9aef42c92da8407d339e8af8.zip
cpython-e5fe509054183bed9aef42c92da8407d339e8af8.tar.gz
cpython-e5fe509054183bed9aef42c92da8407d339e8af8.tar.bz2
1 files changed, 21 insertions, 0 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 96539bd..62cd296 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -81,6 +81,7 @@ tok_new(void)
     tok->decoding_readline = NULL;
     tok->decoding_buffer = NULL;
     tok->type_comments = 0;
+    tok->stdin_content = NULL;
 
     tok->async_hacks = 0;
     tok->async_def = 0;
@@ -816,6 +817,8 @@ PyTokenizer_Free(struct tok_state *tok)
         PyMem_Free(tok->buf);
     if (tok->input)
         PyMem_Free(tok->input);
+    if (tok->stdin_content)
+        PyMem_Free(tok->stdin_content);
     PyMem_Free(tok);
 }
 
@@ -856,6 +859,24 @@ tok_nextc(struct tok_state *tok)
                 if (translated == NULL)
                     return EOF;
                 newtok = translated;
+                if (tok->stdin_content == NULL) {
+                    tok->stdin_content = PyMem_Malloc(strlen(translated) + 1);
+                    if (tok->stdin_content == NULL) {
+                        tok->done = E_NOMEM;
+                        return EOF;
+                    }
+                    sprintf(tok->stdin_content, "%s", translated);
+                }
+                else {
+                    char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1);
+                    if (new_str == NULL) {
+                        tok->done = E_NOMEM;
+                        return EOF;
+                    }
+                    sprintf(new_str, "%s%s", tok->stdin_content, translated);
+                    PyMem_Free(tok->stdin_content);
+                    tok->stdin_content = new_str;
+                }
             }
             if (tok->encoding && newtok && *newtok) {
                 /* Recode to UTF-8 */
author	Lysandros Nikolaou <lisandrosnik@gmail.com>	2021-01-14 21:36:30 (GMT)
committer	GitHub <noreply@github.com>	2021-01-14 21:36:30 (GMT)
commit	e5fe509054183bed9aef42c92da8407d339e8af8 (patch)
tree	74174755289b6d7f87fea41612d9882f9f8202ba /Parser/tokenizer.c
parent	971235827754eee6c0d9f7d39b52fecdfd4cb7b4 (diff)
download	cpython-e5fe509054183bed9aef42c92da8407d339e8af8.zip cpython-e5fe509054183bed9aef42c92da8407d339e8af8.tar.gz cpython-e5fe509054183bed9aef42c92da8407d339e8af8.tar.bz2