summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2023-06-06 11:52:16 (GMT)
committerGitHub <noreply@github.com>2023-06-06 11:52:16 (GMT)
commitc0a6ed39343b6dc355607fbff108c515e6c103bf (patch)
tree39b12fa4a80178944cba90020076b336f4c9f1f4
parent0202aa002e06acef9aa55ace0d939103df19cadd (diff)
downloadcpython-c0a6ed39343b6dc355607fbff108c515e6c103bf.zip
cpython-c0a6ed39343b6dc355607fbff108c515e6c103bf.tar.gz
cpython-c0a6ed39343b6dc355607fbff108c515e6c103bf.tar.bz2
gh-105259: Ensure we don't show newline characters for trailing NEWLINE tokens (#105364)
-rw-r--r--Lib/test/test_tokenize.py2
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst2
-rw-r--r--Parser/tokenizer.c7
-rw-r--r--Parser/tokenizer.h1
-rw-r--r--Python/Python-tokenize.c10
5 files changed, 17 insertions, 5 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index a9a2b76..5ac1709 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1870,7 +1870,7 @@ class CTokenizeTest(TestCase):
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'),
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'),
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'),
- TokenInfo(type=NEWLINE, string='\n', start=(1, 3), end=(1, 4), line='1+1\n'),
+ TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1\n'),
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
for encoding in ["utf-8", "latin-1", "utf-16"]:
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst
new file mode 100644
index 0000000..75a6303
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst
@@ -0,0 +1,2 @@
+Don't include newline character for trailing ``NEWLINE`` tokens emitted in
+the :mod:`tokenize` module. Patch by Pablo Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index db12828..b349f59 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -114,6 +114,7 @@ tok_new(void)
tok->report_warnings = 1;
tok->tok_extra_tokens = 0;
tok->comment_newline = 0;
+ tok->implicit_newline = 0;
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
tok->tok_mode_stack_index = 0;
tok->tok_report_warnings = 1;
@@ -355,10 +356,12 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
return -1;
}
strcpy(new_str + current_size, line);
+ tok->implicit_newline = 0;
if (last_char != '\n') {
/* Last line does not end in \n, fake one */
new_str[current_size + line_size - 1] = '\n';
new_str[current_size + line_size] = '\0';
+ tok->implicit_newline = 1;
}
tok->interactive_src_start = new_str;
tok->interactive_src_end = new_str + current_size + line_size;
@@ -1262,11 +1265,13 @@ tok_underflow_file(struct tok_state *tok) {
tok->done = E_EOF;
return 0;
}
+ tok->implicit_newline = 0;
if (tok->inp[-1] != '\n') {
assert(tok->inp + 1 < tok->end);
/* Last line does not end in \n, fake one */
*tok->inp++ = '\n';
*tok->inp = '\0';
+ tok->implicit_newline = 1;
}
ADVANCE_LINENO();
@@ -1304,11 +1309,13 @@ tok_underflow_readline(struct tok_state* tok) {
tok->done = E_EOF;
return 0;
}
+ tok->implicit_newline = 0;
if (tok->inp[-1] != '\n') {
assert(tok->inp + 1 < tok->end);
/* Last line does not end in \n, fake one */
*tok->inp++ = '\n';
*tok->inp = '\0';
+ tok->implicit_newline = 1;
}
ADVANCE_LINENO();
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 600d429..16e919a 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -131,6 +131,7 @@ struct tok_state {
int tok_report_warnings;
int tok_extra_tokens;
int comment_newline;
+ int implicit_newline;
#ifdef Py_DEBUG
int debug;
#endif
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index a7933b2..223de54 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -243,10 +243,12 @@ tokenizeriter_next(tokenizeriterobject *it)
}
else if (type == NEWLINE) {
Py_DECREF(str);
- if (it->tok->start[0] == '\r') {
- str = PyUnicode_FromString("\r\n");
- } else {
- str = PyUnicode_FromString("\n");
+ if (!it->tok->implicit_newline) {
+ if (it->tok->start[0] == '\r') {
+ str = PyUnicode_FromString("\r\n");
+ } else {
+ str = PyUnicode_FromString("\n");
+ }
}
end_col_offset++;
}