diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-07-10 00:29:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-10 00:29:29 (GMT) |
commit | f24777c2b329974b69d2a3bf5cfc37e0fcace36c (patch) | |
tree | d89b8067f343529ea0cdb5f187767e524693e8c6 /Parser/tokenizer.c | |
parent | 7b21108445969398f6d1db9234fc0fe727565d2e (diff) | |
download | cpython-f24777c2b329974b69d2a3bf5cfc37e0fcace36c.zip cpython-f24777c2b329974b69d2a3bf5cfc37e0fcace36c.tar.gz cpython-f24777c2b329974b69d2a3bf5cfc37e0fcace36c.tar.bz2 |
bpo-44317: Improve tokenizer errors with more informative locations (GH-26555)
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 72 |
1 files changed, 54 insertions, 18 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index ba93664..50a0afc 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1071,19 +1071,13 @@ tok_backup(struct tok_state *tok, int c) } } - static int -syntaxerror(struct tok_state *tok, const char *format, ...) +_syntaxerror_range(struct tok_state *tok, const char *format, + int col_offset, int end_col_offset, + va_list vargs) { PyObject *errmsg, *errtext, *args; - va_list vargs; -#ifdef HAVE_STDARG_PROTOTYPES - va_start(vargs, format); -#else - va_start(vargs); -#endif errmsg = PyUnicode_FromFormatV(format, vargs); - va_end(vargs); if (!errmsg) { goto error; } @@ -1093,7 +1087,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...) if (!errtext) { goto error; } - int offset = (int)PyUnicode_GET_LENGTH(errtext); + + if (col_offset == -1) { + col_offset = (int)PyUnicode_GET_LENGTH(errtext); + } + if (end_col_offset == -1) { + end_col_offset = col_offset; + } + Py_ssize_t line_len = strcspn(tok->line_start, "\n"); if (line_len != tok->cur - tok->line_start) { Py_DECREF(errtext); @@ -1104,8 +1105,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...) goto error; } - args = Py_BuildValue("(O(OiiN))", errmsg, - tok->filename, tok->lineno, offset, errtext); + args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno, + col_offset, errtext, tok->lineno, end_col_offset); if (args) { PyErr_SetObject(PyExc_SyntaxError, args); Py_DECREF(args); @@ -1118,6 +1119,38 @@ error: } static int +syntaxerror(struct tok_state *tok, const char *format, ...) +{ + va_list vargs; +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + int ret = _syntaxerror_range(tok, format, -1, -1, vargs); + va_end(vargs); + return ret; +} + +static int +syntaxerror_known_range(struct tok_state *tok, + int col_offset, int end_col_offset, + const char *format, ...) +{ + va_list vargs; +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs); + va_end(vargs); + return ret; +} + + + +static int indenterror(struct tok_state *tok) { tok->done = E_TABSPACE; @@ -1692,12 +1725,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); } if (c < '0' || c >= '8') { - tok_backup(tok, c); if (isdigit(c)) { return syntaxerror(tok, "invalid digit '%c' in octal literal", c); } else { + tok_backup(tok, c); return syntaxerror(tok, "invalid octal literal"); } } @@ -1721,12 +1754,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); } if (c != '0' && c != '1') { - tok_backup(tok, c); if (isdigit(c)) { return syntaxerror(tok, "invalid digit '%c' in binary literal", c); } else { + tok_backup(tok, c); return syntaxerror(tok, "invalid binary literal"); } } @@ -1759,6 +1792,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } c = tok_nextc(tok); } + char* zeros_end = tok->cur; if (isdigit(c)) { nonzero = 1; c = tok_decimal_tail(tok); @@ -1779,10 +1813,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) else if (nonzero) { /* Old-style octal: now disallowed. */ tok_backup(tok, c); - return syntaxerror(tok, - "leading zeros in decimal integer " - "literals are not permitted; " - "use an 0o prefix for octal integers"); + return syntaxerror_known_range( + tok, (int)(tok->start + 1 - tok->line_start), + (int)(zeros_end - tok->line_start), + "leading zeros in decimal integer " + "literals are not permitted; " + "use an 0o prefix for octal integers"); } if (!verify_end_of_number(tok, c, "decimal")) { return ERRORTOKEN; |