summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2021-07-10 00:47:33 (GMT)
committerGitHub <noreply@github.com>2021-07-10 00:47:33 (GMT)
commit2a722d4fab6a9656f3c03cfdaf6d1684277b8af5 (patch)
tree599524ffffe9b7de50fac6564eb892204fcf7b6a /Parser
parent2f7636887e9f978352aa47b18d5f376263663ba1 (diff)
downloadcpython-2a722d4fab6a9656f3c03cfdaf6d1684277b8af5.zip
cpython-2a722d4fab6a9656f3c03cfdaf6d1684277b8af5.tar.gz
cpython-2a722d4fab6a9656f3c03cfdaf6d1684277b8af5.tar.bz2
bpo-44317: Improve tokenizer errors with more informative locations (GH-26555) (GH-27079)
(cherry picked from commit f24777c2b329974b69d2a3bf5cfc37e0fcace36c) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Diffstat (limited to 'Parser')
-rw-r--r--Parser/tokenizer.c72
1 files changed, 54 insertions, 18 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index be9b13e..3dea77e 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1071,19 +1071,13 @@ tok_backup(struct tok_state *tok, int c)
}
}
-
static int
-syntaxerror(struct tok_state *tok, const char *format, ...)
+_syntaxerror_range(struct tok_state *tok, const char *format,
+ int col_offset, int end_col_offset,
+ va_list vargs)
{
PyObject *errmsg, *errtext, *args;
- va_list vargs;
-#ifdef HAVE_STDARG_PROTOTYPES
- va_start(vargs, format);
-#else
- va_start(vargs);
-#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
- va_end(vargs);
if (!errmsg) {
goto error;
}
@@ -1093,7 +1087,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
if (!errtext) {
goto error;
}
- int offset = (int)PyUnicode_GET_LENGTH(errtext);
+
+ if (col_offset == -1) {
+ col_offset = (int)PyUnicode_GET_LENGTH(errtext);
+ }
+ if (end_col_offset == -1) {
+ end_col_offset = col_offset;
+ }
+
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
if (line_len != tok->cur - tok->line_start) {
Py_DECREF(errtext);
@@ -1104,8 +1105,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
goto error;
}
- args = Py_BuildValue("(O(OiiN))", errmsg,
- tok->filename, tok->lineno, offset, errtext);
+ args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
+ col_offset, errtext, tok->lineno, end_col_offset);
if (args) {
PyErr_SetObject(PyExc_SyntaxError, args);
Py_DECREF(args);
@@ -1118,6 +1119,38 @@ error:
}
static int
+syntaxerror(struct tok_state *tok, const char *format, ...)
+{
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+static int
+syntaxerror_known_range(struct tok_state *tok,
+ int col_offset, int end_col_offset,
+ const char *format, ...)
+{
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+
+
+static int
indenterror(struct tok_state *tok)
{
tok->done = E_TABSPACE;
@@ -1692,12 +1725,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c < '0' || c >= '8') {
- tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
else {
+ tok_backup(tok, c);
return syntaxerror(tok, "invalid octal literal");
}
}
@@ -1721,12 +1754,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c != '0' && c != '1') {
- tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
else {
+ tok_backup(tok, c);
return syntaxerror(tok, "invalid binary literal");
}
}
@@ -1759,6 +1792,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
c = tok_nextc(tok);
}
+ char* zeros_end = tok->cur;
if (isdigit(c)) {
nonzero = 1;
c = tok_decimal_tail(tok);
@@ -1779,10 +1813,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok_backup(tok, c);
- return syntaxerror(tok,
- "leading zeros in decimal integer "
- "literals are not permitted; "
- "use an 0o prefix for octal integers");
+ return syntaxerror_known_range(
+ tok, (int)(tok->start + 1 - tok->line_start),
+ (int)(zeros_end - tok->line_start),
+ "leading zeros in decimal integer "
+ "literals are not permitted; "
+ "use an 0o prefix for octal integers");
}
if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;