diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2022-07-05 18:14:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-05 18:14:28 (GMT) |
commit | 697e78ca05ee989c383996a7fde2277fb2d2886f (patch) | |
tree | 30ad04672a7505dd6d51fd47c2551fa024f46a3c | |
parent | 922075c964e5d630402176169980b8831c42409c (diff) | |
download | cpython-697e78ca05ee989c383996a7fde2277fb2d2886f.zip cpython-697e78ca05ee989c383996a7fde2277fb2d2886f.tar.gz cpython-697e78ca05ee989c383996a7fde2277fb2d2886f.tar.bz2 |
[3.10] gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin (GH-94386) (GH-94574)
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Co-authored-by: Ćukasz Langa <lukasz@langa.pl>
(cherry picked from commit 36fcde61ba48c4e918830691ecf4092e4e3b9b99)
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst | 2 | ||||
-rw-r--r-- | Parser/pegen.c | 8 | ||||
-rw-r--r-- | Parser/tokenizer.c | 10 |
3 files changed, 15 insertions, 5 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst new file mode 100644 index 0000000..0a74ba3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst @@ -0,0 +1,2 @@ +Fixed a tokenizer crash when reading encoded files with syntax errors from +``stdin`` with non utf-8 encoded text. Patch by Pablo Galindo diff --git a/Parser/pegen.c b/Parser/pegen.c index c048243..414059d 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -446,12 +446,12 @@ get_error_line(Parser *p, Py_ssize_t lineno) Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno; for (int i = 0; i < relative_lineno - 1; i++) { - char *new_line = strchr(cur_line, '\n') + 1; - assert(new_line != NULL && new_line <= buf_end); - if (new_line == NULL || new_line > buf_end) { + char *new_line = strchr(cur_line, '\n'); + assert(new_line != NULL && new_line + 1 < buf_end); + if (new_line == NULL || new_line + 1 > buf_end) { break; } - cur_line = new_line; + cur_line = new_line + 1; } char *next_newline; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 267ccec..579474c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -328,6 +328,10 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) { Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start; Py_ssize_t line_size = strlen(line); + char last_char = line[line_size > 0 ? line_size - 1 : line_size]; + if (last_char != '\n') { + line_size += 1; + } char* new_str = tok->interactive_src_start; new_str = PyMem_Realloc(new_str, current_size + line_size + 1); @@ -341,7 +345,11 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) { return -1; } strcpy(new_str + current_size, line); - + if (last_char != '\n') { + /* Last line does not end in \n, fake one */ + new_str[current_size + line_size - 1] = '\n'; + new_str[current_size + line_size] = '\0'; + } tok->interactive_src_start = new_str; tok->interactive_src_end = new_str + current_size + line_size; return 0; |