gh-112943: Correctly compute end offsets for multiline tokens in the tokenize module (#112949)

author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2023-12-11 11:44:22 (GMT)
committer: GitHub <noreply@github.com> 2023-12-11 11:44:22 (GMT)
commit: a135a6d2c6d503b186695f01efa7eed65611b04e (patch)
tree: 29f178c34c1e763f84ecb3b63be7c5024c4f51e2 /Parser
parent: 4c5b9c107a1d158b245f21a1839a2bec97d05383 (diff)
download: cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.zip
cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.tar.gz
cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.tar.bz2
2 files changed, 12 insertions, 5 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 0c60394..7766253 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -19,12 +19,8 @@ _PyPegen_interactive_exit(Parser *p)
 }
 
 Py_ssize_t
-_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
+_PyPegen_byte_offset_to_character_offset_raw(const char* str, Py_ssize_t col_offset)
 {
-    const char *str = PyUnicode_AsUTF8(line);
-    if (!str) {
-        return -1;
-    }
     Py_ssize_t len = strlen(str);
     if (col_offset > len + 1) {
         col_offset = len + 1;
@@ -39,6 +35,16 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
     return size;
 }
 
+Py_ssize_t
+_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
+{
+    const char *str = PyUnicode_AsUTF8(line);
+    if (!str) {
+        return -1;
+    }
+    return _PyPegen_byte_offset_to_character_offset_raw(str, col_offset);
+}
+
 // Here, mark is the start of the node, while p->mark is the end.
 // If node==NULL, they should be the same.
 int
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 424f80a..57b45a5 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -149,6 +149,7 @@ expr_ty _PyPegen_name_token(Parser *p);
 expr_ty _PyPegen_number_token(Parser *p);
 void *_PyPegen_string_token(Parser *p);
 Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
+Py_ssize_t _PyPegen_byte_offset_to_character_offset_raw(const char*, Py_ssize_t col_offset);
 
 // Error handling functions and APIs
 typedef enum {
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2023-12-11 11:44:22 (GMT)
committer	GitHub <noreply@github.com>	2023-12-11 11:44:22 (GMT)
commit	a135a6d2c6d503b186695f01efa7eed65611b04e (patch)
tree	29f178c34c1e763f84ecb3b63be7c5024c4f51e2 /Parser
parent	4c5b9c107a1d158b245f21a1839a2bec97d05383 (diff)
download	cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.zip cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.tar.gz cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.tar.bz2