summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_tokenize.py11
-rw-r--r--Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst1
-rw-r--r--Python/Python-tokenize.c7
3 files changed, 18 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 4428e8c..36dba71 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1199,6 +1199,17 @@ async def f():
NAME 'x' (1, 3) (1, 4)
""")
+ def test_multiline_non_ascii_fstring(self):
+ self.check_tokenize("""\
+a = f'''
+ Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
+ NAME 'a' (1, 0) (1, 1)
+ OP '=' (1, 2) (1, 3)
+ FSTRING_START "f\'\'\'" (1, 4) (1, 8)
+ FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
+ FSTRING_END "\'\'\'" (2, 68) (2, 71)
+ """)
+
class GenerateTokensTest(TokenizeTest):
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst
new file mode 100644
index 0000000..76714b0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst
@@ -0,0 +1 @@
+Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index 09fad18..2591dae 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -36,6 +36,7 @@ typedef struct
/* Needed to cache line for performance */
PyObject *last_line;
Py_ssize_t last_lineno;
+ Py_ssize_t last_end_lineno;
Py_ssize_t byte_col_offset_diff;
} tokenizeriterobject;
@@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
self->last_line = NULL;
self->byte_col_offset_diff = 0;
self->last_lineno = 0;
+ self->last_end_lineno = 0;
return (PyObject *)self;
}
@@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it)
Py_XDECREF(it->last_line);
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
it->last_line = line;
- it->byte_col_offset_diff = 0;
+ if (it->tok->lineno != it->last_end_lineno) {
+ it->byte_col_offset_diff = 0;
+ }
} else {
// Line hasn't changed so we reuse the cached one.
line = it->last_line;
@@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it)
Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
Py_ssize_t end_lineno = it->tok->lineno;
it->last_lineno = lineno;
+ it->last_end_lineno = end_lineno;
Py_ssize_t col_offset = -1;
Py_ssize_t end_col_offset = -1;