diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2020-04-21 00:53:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-21 00:53:04 (GMT) |
commit | 11a7f158ef51b0edcde3c3d9215172354e385877 (patch) | |
tree | 3bb7e125dce1a522ccbbe1ffbd14204c71bc852e | |
parent | 6a9e80a93148b13e4d3bceaab5ea1804ab0e64d5 (diff) | |
download | cpython-11a7f158ef51b0edcde3c3d9215172354e385877.zip cpython-11a7f158ef51b0edcde3c3d9215172354e385877.tar.gz cpython-11a7f158ef51b0edcde3c3d9215172354e385877.tar.bz2 |
bpo-40335: Correctly handle multi-line strings in tokenize error scenarios (GH-19619)
Co-authored-by: Guido van Rossum <gvanrossum@gmail.com>
-rw-r--r-- | Lib/test/test_exceptions.py | 12 | ||||
-rw-r--r-- | Parser/parsetok.c | 41 | ||||
-rw-r--r-- | Parser/tokenizer.c | 7 |
3 files changed, 37 insertions, 23 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index d6739f1..8c4a288 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -188,7 +188,7 @@ class ExceptionTests(unittest.TestCase): if not isinstance(src, str): src = src.decode(encoding, 'replace') line = src.split('\n')[lineno-1] - self.assertEqual(cm.exception.text.rstrip('\n'), line) + self.assertIn(line, cm.exception.text) check('def fact(x):\n\treturn x!\n', 2, 10) check('1 +\n', 1, 4) @@ -217,6 +217,16 @@ class ExceptionTests(unittest.TestCase): check(b'\xce\xb1 = 0xI', 1, 6) check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6, encoding='iso8859-7') + check(b"""if 1: + def foo(): + ''' + + def bar(): + pass + + def baz(): + '''quux''' + """, 9, 20) # Errors thrown by symtable.c check('x = [(yield i) for i in range(3)]', 1, 5) diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 37ca65c..1ecb2c4 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -251,25 +251,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, const char *line_start; type = PyTokenizer_Get(tok, &a, &b); - if (type == ERRORTOKEN) { - err_ret->error = tok->done; - break; - } - if (type == ENDMARKER && started) { - type = NEWLINE; /* Add an extra newline */ - started = 0; - /* Add the right number of dedent tokens, - except if a certain flag is given -- - codeop.py uses this. */ - if (tok->indent && - !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) - { - tok->pendin = -tok->indent; - tok->indent = 0; - } - } - else - started = 1; + len = (a != NULL && b != NULL) ? b - a : 0; str = (char *) PyObject_MALLOC(len + 1); if (str == NULL) { @@ -328,6 +310,27 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, continue; } + if (type == ERRORTOKEN) { + err_ret->error = tok->done; + break; + } + if (type == ENDMARKER && started) { + type = NEWLINE; /* Add an extra newline */ + started = 0; + /* Add the right number of dedent tokens, + except if a certain flag is given -- + codeop.py uses this. */ + if (tok->indent && + !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) + { + tok->pendin = -tok->indent; + tok->indent = 0; + } + } + else { + started = 1; + } + if ((err_ret->error = PyParser_AddToken(ps, (int)type, str, lineno, col_offset, tok->lineno, end_col_offset, diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 97986aa..95dfc53 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1392,13 +1392,14 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) if (nonascii && !verify_identifier(tok)) { return ERRORTOKEN; } + + *p_start = tok->start; + *p_end = tok->cur; + if (c == '"' || c == '\'') { tok->done = E_BADPREFIX; return ERRORTOKEN; } - *p_start = tok->start; - *p_end = tok->cur; - /* async/await parsing block. */ if (tok->cur - tok->start == 5 && tok->start[0] == 'a') { /* May be an 'async' or 'await' token. For Python 3.7 or |