summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2023-05-24 10:40:51 (GMT)
committerGitHub <noreply@github.com>2023-05-24 10:40:51 (GMT)
commit3d2ed8991f9f0f4bbefe4c6f5c8bbbb92259bac6 (patch)
tree1e8a1de7639ecf27bc8c0ccf83a502d4d86d932d
parent2d685eca8a6ef25963609246d18097032358881c (diff)
downloadcpython-3d2ed8991f9f0f4bbefe4c6f5c8bbbb92259bac6.zip
cpython-3d2ed8991f9f0f4bbefe4c6f5c8bbbb92259bac6.tar.gz
cpython-3d2ed8991f9f0f4bbefe4c6f5c8bbbb92259bac6.tar.bz2
[3.12] gh-104825: Remove implicit newline in the line attribute in tokens emitted in the tokenize module (GH-104846). (#104850)
(cherry picked from commit c8cf9b42eb2bfbd4c3e708ec28d32430248a1d7a)
-rw-r--r--Lib/test/test_tabnanny.py8
-rw-r--r--Lib/test/test_tokenize.py4
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst2
-rw-r--r--Python/Python-tokenize.c4
4 files changed, 12 insertions, 6 deletions
diff --git a/Lib/test/test_tabnanny.py b/Lib/test/test_tabnanny.py
index aa70011..cc122ca 100644
--- a/Lib/test/test_tabnanny.py
+++ b/Lib/test/test_tabnanny.py
@@ -222,7 +222,7 @@ class TestCheck(TestCase):
"""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
out = f"{file_path!r}: *** Line 3: trouble in tab city! ***\n"
- out += "offending line: '\\tprint(\"world\")\\n'\n"
+ out += "offending line: '\\tprint(\"world\")'\n"
out += "inconsistent use of tabs and spaces in indentation\n"
tabnanny.verbose = 1
@@ -231,7 +231,7 @@ class TestCheck(TestCase):
def test_when_nannynag_error(self):
"""A python source code file eligible for raising `tabnanny.NannyNag`."""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
- out = f"{file_path} 3 '\\tprint(\"world\")\\n'\n"
+ out = f"{file_path} 3 '\\tprint(\"world\")'\n"
self.verify_tabnanny_check(file_path, out=out)
def test_when_no_file(self):
@@ -341,7 +341,7 @@ class TestCommandLine(TestCase):
"""Should display more error information if verbose mode is on."""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path:
stdout = textwrap.dedent(
- "offending line: '\\tprint(\"world\")\\n'"
+ "offending line: '\\tprint(\"world\")'"
).strip()
self.validate_cmd("-v", path, stdout=stdout, partial=True)
@@ -349,6 +349,6 @@ class TestCommandLine(TestCase):
"""Should display detailed error information if double verbose is on."""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path:
stdout = textwrap.dedent(
- "offending line: '\\tprint(\"world\")\\n'"
+ "offending line: '\\tprint(\"world\")'"
).strip()
self.validate_cmd("-vv", path, stdout=stdout, partial=True)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 8e7ab3d..fd9c919 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -103,7 +103,7 @@ def k(x):
e.exception.msg,
'unindent does not match any outer indentation level')
self.assertEqual(e.exception.offset, 9)
- self.assertEqual(e.exception.text, ' x += 5\n')
+ self.assertEqual(e.exception.text, ' x += 5')
def test_int(self):
# Ordinary integers and binary operators
@@ -1157,7 +1157,7 @@ class Test_Tokenize(TestCase):
# skip the initial encoding token and the end tokens
tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
- expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
+ expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
self.assertEqual(tokens, expected_tokens,
"bytes not decoded with encoding")
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst
new file mode 100644
index 0000000..caf5d35
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst
@@ -0,0 +1,2 @@
+Tokens emitted by the :mod:`tokenize` module do not include an implicit
+``\n`` character in the ``line`` attribute anymore. Patch by Pablo Galindo
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index f7e32d3..0023e30 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -123,6 +123,8 @@ _tokenizer_error(struct tok_state *tok)
int result = 0;
Py_ssize_t size = tok->inp - tok->buf;
+ assert(tok->buf[size-1] == '\n');
+ size -= 1; // Remove the newline character from the end of the line
error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace");
if (!error_line) {
result = -1;
@@ -193,6 +195,8 @@ tokenizeriter_next(tokenizeriterobject *it)
}
Py_ssize_t size = it->tok->inp - it->tok->buf;
+ assert(it->tok->buf[size-1] == '\n');
+ size -= 1; // Remove the newline character from the end of the line
PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
if (line == NULL) {
Py_DECREF(str);