[3.12] gh-112387: Fix error positions for decoded strings with backwards tokenize errors (GH-112409) (#112468)

gh-112387: Fix error positions for decoded strings with backwards tokenize errors (GH-112409) (cherry picked from commit 45d648597b1146431bf3d91041e60d7f040e70bf) Signed-off-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2023-11-27 18:57:34 (GMT)
committer: GitHub <noreply@github.com> 2023-11-27 18:57:34 (GMT)
commit: 7140716c883d8f40716543e827fa396609f39962 (patch)
tree: 43cb67a31f3df4c667436797416ea844108f3f69
parent: 4463d2e01886824ccc7857205f6acb9a3d01d0ab (diff)
download: cpython-7140716c883d8f40716543e827fa396609f39962.zip
cpython-7140716c883d8f40716543e827fa396609f39962.tar.gz
cpython-7140716c883d8f40716543e827fa396609f39962.tar.bz2
3 files changed, 10 insertions, 0 deletions
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 00c5f62..5183aaa 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -2296,6 +2296,10 @@ func(
 """
         self._check_error(code, "parenthesis '\\)' does not match opening parenthesis '\\['")
 
+        # Examples with dencodings
+        s = b'# coding=latin\n(aaaaaaaaaaaaaaaaa\naaaaaaaaaaa\xb5'
+        self._check_error(s, "'\(' was never closed")
+
     def test_error_string_literal(self):
 
         self._check_error("'blech", "unterminated string literal")
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-25-22-39-44.gh-issue-112387.AbBq5W.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-25-22-39-44.gh-issue-112387.AbBq5W.rst
new file mode 100644
index 0000000..adac11b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-25-22-39-44.gh-issue-112387.AbBq5W.rst
@@ -0,0 +1,2 @@
+Fix error positions for decoded strings with backwards tokenize errors.
+Patch by Pablo Galindo
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index 6390a66..e832422 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -276,6 +276,10 @@ get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
     Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno;
     const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp;
 
+    if (buf_end < cur_line) {
+        buf_end = cur_line + strlen(cur_line);
+    }
+
     for (int i = 0; i < relative_lineno - 1; i++) {
         char *new_line = strchr(cur_line, '\n');
         // The assert is here for debug builds but the conditional that
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2023-11-27 18:57:34 (GMT)
committer	GitHub <noreply@github.com>	2023-11-27 18:57:34 (GMT)
commit	7140716c883d8f40716543e827fa396609f39962 (patch)
tree	43cb67a31f3df4c667436797416ea844108f3f69
parent	4463d2e01886824ccc7857205f6acb9a3d01d0ab (diff)
download	cpython-7140716c883d8f40716543e827fa396609f39962.zip cpython-7140716c883d8f40716543e827fa396609f39962.tar.gz cpython-7140716c883d8f40716543e827fa396609f39962.tar.bz2