summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-04-21 00:53:04 (GMT)
committerGitHub <noreply@github.com>2020-04-21 00:53:04 (GMT)
commit11a7f158ef51b0edcde3c3d9215172354e385877 (patch)
tree3bb7e125dce1a522ccbbe1ffbd14204c71bc852e
parent6a9e80a93148b13e4d3bceaab5ea1804ab0e64d5 (diff)
downloadcpython-11a7f158ef51b0edcde3c3d9215172354e385877.zip
cpython-11a7f158ef51b0edcde3c3d9215172354e385877.tar.gz
cpython-11a7f158ef51b0edcde3c3d9215172354e385877.tar.bz2
bpo-40335: Correctly handle multi-line strings in tokenize error scenarios (GH-19619)
Co-authored-by: Guido van Rossum <gvanrossum@gmail.com>
-rw-r--r--Lib/test/test_exceptions.py12
-rw-r--r--Parser/parsetok.c41
-rw-r--r--Parser/tokenizer.c7
3 files changed, 37 insertions, 23 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index d6739f1..8c4a288 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -188,7 +188,7 @@ class ExceptionTests(unittest.TestCase):
if not isinstance(src, str):
src = src.decode(encoding, 'replace')
line = src.split('\n')[lineno-1]
- self.assertEqual(cm.exception.text.rstrip('\n'), line)
+ self.assertIn(line, cm.exception.text)
check('def fact(x):\n\treturn x!\n', 2, 10)
check('1 +\n', 1, 4)
@@ -217,6 +217,16 @@ class ExceptionTests(unittest.TestCase):
check(b'\xce\xb1 = 0xI', 1, 6)
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
encoding='iso8859-7')
+ check(b"""if 1:
+ def foo():
+ '''
+
+ def bar():
+ pass
+
+ def baz():
+ '''quux'''
+ """, 9, 20)
# Errors thrown by symtable.c
check('x = [(yield i) for i in range(3)]', 1, 5)
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 37ca65c..1ecb2c4 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -251,25 +251,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
const char *line_start;
type = PyTokenizer_Get(tok, &a, &b);
- if (type == ERRORTOKEN) {
- err_ret->error = tok->done;
- break;
- }
- if (type == ENDMARKER && started) {
- type = NEWLINE; /* Add an extra newline */
- started = 0;
- /* Add the right number of dedent tokens,
- except if a certain flag is given --
- codeop.py uses this. */
- if (tok->indent &&
- !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
- {
- tok->pendin = -tok->indent;
- tok->indent = 0;
- }
- }
- else
- started = 1;
+
len = (a != NULL && b != NULL) ? b - a : 0;
str = (char *) PyObject_MALLOC(len + 1);
if (str == NULL) {
@@ -328,6 +310,27 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
continue;
}
+ if (type == ERRORTOKEN) {
+ err_ret->error = tok->done;
+ break;
+ }
+ if (type == ENDMARKER && started) {
+ type = NEWLINE; /* Add an extra newline */
+ started = 0;
+ /* Add the right number of dedent tokens,
+ except if a certain flag is given --
+ codeop.py uses this. */
+ if (tok->indent &&
+ !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
+ {
+ tok->pendin = -tok->indent;
+ tok->indent = 0;
+ }
+ }
+ else {
+ started = 1;
+ }
+
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
lineno, col_offset, tok->lineno, end_col_offset,
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 97986aa..95dfc53 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1392,13 +1392,14 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
if (nonascii && !verify_identifier(tok)) {
return ERRORTOKEN;
}
+
+ *p_start = tok->start;
+ *p_end = tok->cur;
+
if (c == '"' || c == '\'') {
tok->done = E_BADPREFIX;
return ERRORTOKEN;
}
- *p_start = tok->start;
- *p_end = tok->cur;
-
/* async/await parsing block. */
if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
/* May be an 'async' or 'await' token. For Python 3.7 or