summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2023-12-11 11:44:22 (GMT)
committerGitHub <noreply@github.com>2023-12-11 11:44:22 (GMT)
commita135a6d2c6d503b186695f01efa7eed65611b04e (patch)
tree29f178c34c1e763f84ecb3b63be7c5024c4f51e2 /Parser
parent4c5b9c107a1d158b245f21a1839a2bec97d05383 (diff)
downloadcpython-a135a6d2c6d503b186695f01efa7eed65611b04e.zip
cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.tar.gz
cpython-a135a6d2c6d503b186695f01efa7eed65611b04e.tar.bz2
gh-112943: Correctly compute end offsets for multiline tokens in the tokenize module (#112949)
Diffstat (limited to 'Parser')
-rw-r--r--Parser/pegen.c16
-rw-r--r--Parser/pegen.h1
2 files changed, 12 insertions, 5 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 0c60394..7766253 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -19,12 +19,8 @@ _PyPegen_interactive_exit(Parser *p)
}
Py_ssize_t
-_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
+_PyPegen_byte_offset_to_character_offset_raw(const char* str, Py_ssize_t col_offset)
{
- const char *str = PyUnicode_AsUTF8(line);
- if (!str) {
- return -1;
- }
Py_ssize_t len = strlen(str);
if (col_offset > len + 1) {
col_offset = len + 1;
@@ -39,6 +35,16 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
return size;
}
+Py_ssize_t
+_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
+{
+ const char *str = PyUnicode_AsUTF8(line);
+ if (!str) {
+ return -1;
+ }
+ return _PyPegen_byte_offset_to_character_offset_raw(str, col_offset);
+}
+
// Here, mark is the start of the node, while p->mark is the end.
// If node==NULL, they should be the same.
int
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 424f80a..57b45a5 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -149,6 +149,7 @@ expr_ty _PyPegen_name_token(Parser *p);
expr_ty _PyPegen_number_token(Parser *p);
void *_PyPegen_string_token(Parser *p);
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
+Py_ssize_t _PyPegen_byte_offset_to_character_offset_raw(const char*, Py_ssize_t col_offset);
// Error handling functions and APIs
typedef enum {