summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-07-10 00:29:29 (GMT)
committerGitHub <noreply@github.com>2021-07-10 00:29:29 (GMT)
commitf24777c2b329974b69d2a3bf5cfc37e0fcace36c (patch)
treed89b8067f343529ea0cdb5f187767e524693e8c6 /Parser/tokenizer.c
parent7b21108445969398f6d1db9234fc0fe727565d2e (diff)
downloadcpython-f24777c2b329974b69d2a3bf5cfc37e0fcace36c.zip
cpython-f24777c2b329974b69d2a3bf5cfc37e0fcace36c.tar.gz
cpython-f24777c2b329974b69d2a3bf5cfc37e0fcace36c.tar.bz2
bpo-44317: Improve tokenizer errors with more informative locations (GH-26555)
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c72
1 files changed, 54 insertions, 18 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index ba93664..50a0afc 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1071,19 +1071,13 @@ tok_backup(struct tok_state *tok, int c)
}
}
-
static int
-syntaxerror(struct tok_state *tok, const char *format, ...)
+_syntaxerror_range(struct tok_state *tok, const char *format,
+ int col_offset, int end_col_offset,
+ va_list vargs)
{
PyObject *errmsg, *errtext, *args;
- va_list vargs;
-#ifdef HAVE_STDARG_PROTOTYPES
- va_start(vargs, format);
-#else
- va_start(vargs);
-#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
- va_end(vargs);
if (!errmsg) {
goto error;
}
@@ -1093,7 +1087,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
if (!errtext) {
goto error;
}
- int offset = (int)PyUnicode_GET_LENGTH(errtext);
+
+ if (col_offset == -1) {
+ col_offset = (int)PyUnicode_GET_LENGTH(errtext);
+ }
+ if (end_col_offset == -1) {
+ end_col_offset = col_offset;
+ }
+
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
if (line_len != tok->cur - tok->line_start) {
Py_DECREF(errtext);
@@ -1104,8 +1105,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
goto error;
}
- args = Py_BuildValue("(O(OiiN))", errmsg,
- tok->filename, tok->lineno, offset, errtext);
+ args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
+ col_offset, errtext, tok->lineno, end_col_offset);
if (args) {
PyErr_SetObject(PyExc_SyntaxError, args);
Py_DECREF(args);
@@ -1118,6 +1119,38 @@ error:
}
static int
+syntaxerror(struct tok_state *tok, const char *format, ...)
+{
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+static int
+syntaxerror_known_range(struct tok_state *tok,
+ int col_offset, int end_col_offset,
+ const char *format, ...)
+{
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+
+
+static int
indenterror(struct tok_state *tok)
{
tok->done = E_TABSPACE;
@@ -1692,12 +1725,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c < '0' || c >= '8') {
- tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
else {
+ tok_backup(tok, c);
return syntaxerror(tok, "invalid octal literal");
}
}
@@ -1721,12 +1754,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c != '0' && c != '1') {
- tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
else {
+ tok_backup(tok, c);
return syntaxerror(tok, "invalid binary literal");
}
}
@@ -1759,6 +1792,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
c = tok_nextc(tok);
}
+ char* zeros_end = tok->cur;
if (isdigit(c)) {
nonzero = 1;
c = tok_decimal_tail(tok);
@@ -1779,10 +1813,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok_backup(tok, c);
- return syntaxerror(tok,
- "leading zeros in decimal integer "
- "literals are not permitted; "
- "use an 0o prefix for octal integers");
+ return syntaxerror_known_range(
+ tok, (int)(tok->start + 1 - tok->line_start),
+ (int)(zeros_end - tok->line_start),
+ "leading zeros in decimal integer "
+ "literals are not permitted; "
+ "use an 0o prefix for octal integers");
}
if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;