summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_cmd_line_script.py2
-rw-r--r--Lib/test/test_string_literals.py7
-rw-r--r--Parser/pegen/parse_string.c38
-rw-r--r--Parser/pegen/parse_string.h4
-rw-r--r--Parser/pegen/pegen.c10
-rw-r--r--Parser/pegen/pegen.h1
6 files changed, 34 insertions, 28 deletions
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 1fc9500..1713405 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -648,7 +648,7 @@ class CmdLineTest(unittest.TestCase):
self.assertEqual(
stderr.splitlines()[-3:],
[ b' foo = """\\q"""',
- b' ^',
+ b' ^',
b'SyntaxError: invalid escape sequence \\q'
],
)
diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py
index 5b5477d..9565ee2 100644
--- a/Lib/test/test_string_literals.py
+++ b/Lib/test/test_string_literals.py
@@ -118,8 +118,7 @@ class TestLiterals(unittest.TestCase):
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
- if use_old_parser():
- self.assertEqual(w[0].lineno, 1)
+ self.assertEqual(w[0].lineno, 1)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
@@ -128,8 +127,8 @@ class TestLiterals(unittest.TestCase):
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
- if use_old_parser():
- self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.offset, 1)
def test_eval_str_raw(self):
self.assertEqual(eval(""" r'x' """), 'x')
diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c
index d96303d..ca4b733 100644
--- a/Parser/pegen/parse_string.c
+++ b/Parser/pegen/parse_string.c
@@ -12,7 +12,7 @@
// file (like "_PyPegen_raise_syntax_error").
static int
-warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char)
+warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t)
{
PyObject *msg =
PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char);
@@ -20,11 +20,16 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char)
return -1;
}
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
- p->tok->lineno, NULL, NULL) < 0) {
+ t->lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
/* Replace the DeprecationWarning exception with a SyntaxError
to get a more accurate error report */
PyErr_Clear();
+
+ /* This is needed, in order for the SyntaxError to point to the token t,
+ since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
+ error location, if p->known_err_token is not set. */
+ p->known_err_token = t;
RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char);
}
Py_DECREF(msg);
@@ -47,7 +52,7 @@ decode_utf8(const char **sPtr, const char *end)
}
static PyObject *
-decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
+decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
{
PyObject *v, *u;
char *buf;
@@ -110,7 +115,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
if (v != NULL && first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(parser, *first_invalid_escape) < 0) {
+ if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
/* We have not decref u before because first_invalid_escape points
inside u. */
Py_XDECREF(u);
@@ -123,7 +128,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
}
static PyObject *
-decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
+decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{
const char *first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
@@ -132,7 +137,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
}
if (first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(p, *first_invalid_escape) < 0) {
+ if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
Py_DECREF(result);
return NULL;
}
@@ -146,9 +151,14 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
If the string is an f-string, set *fstr and *fstrlen to the unparsed
string object. Return 0 if no errors occurred. */
int
-_PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObject **result,
- const char **fstr, Py_ssize_t *fstrlen)
+_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
+ const char **fstr, Py_ssize_t *fstrlen, Token *t)
{
+ const char *s = PyBytes_AsString(t->bytes);
+ if (s == NULL) {
+ return -1;
+ }
+
size_t len;
int quote = Py_CHARMASK(*s);
int fmode = 0;
@@ -245,7 +255,7 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje
*result = PyBytes_FromStringAndSize(s, len);
}
else {
- *result = decode_bytes_with_escapes(p, s, len);
+ *result = decode_bytes_with_escapes(p, s, len, t);
}
}
else {
@@ -253,7 +263,7 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje
*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
}
else {
- *result = decode_unicode_with_escapes(p, s, len);
+ *result = decode_unicode_with_escapes(p, s, len, t);
}
}
return *result == NULL ? -1 : 0;
@@ -637,7 +647,7 @@ exit:
*/
static int
fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
- PyObject **literal, int recurse_lvl)
+ PyObject **literal, int recurse_lvl, Token *t)
{
/* Get any literal string. It ends when we hit an un-doubled left
brace (which isn't part of a unicode name escape such as
@@ -660,7 +670,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
}
break;
}
- if (ch == '{' && warn_invalid_escape_sequence(p, ch) < 0) {
+ if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
return -1;
}
}
@@ -704,7 +714,7 @@ done:
NULL, NULL);
else
*literal = decode_unicode_with_escapes(p, literal_start,
- s - literal_start);
+ s - literal_start, t);
if (!*literal)
return -1;
}
@@ -1041,7 +1051,7 @@ fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int
assert(*literal == NULL && *expression == NULL);
/* Get any literal string. */
- result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl);
+ result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
if (result < 0)
goto error;
diff --git a/Parser/pegen/parse_string.h b/Parser/pegen/parse_string.h
index 4f2aa94..cd85bd5 100644
--- a/Parser/pegen/parse_string.h
+++ b/Parser/pegen/parse_string.h
@@ -34,8 +34,8 @@ typedef struct {
} FstringParser;
void _PyPegen_FstringParser_Init(FstringParser *);
-int _PyPegen_parsestr(Parser *, const char *, int *, int *, PyObject **,
- const char **, Py_ssize_t *);
+int _PyPegen_parsestr(Parser *, int *, int *, PyObject **,
+ const char **, Py_ssize_t *, Token *);
int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **,
const char *, int, int, Token *, Token *,
Token *);
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index c311593..06af53b 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -383,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, int with_col_number, const ch
PyObject *errstr = NULL;
PyObject *loc = NULL;
PyObject *tmp = NULL;
- Token *t = p->tokens[p->fill - 1];
+ Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
Py_ssize_t col_number = !with_col_number;
va_list va;
p->error_indicator = 1;
@@ -1053,6 +1053,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->starting_col_offset = 0;
p->flags = flags;
p->feature_version = feature_version;
+ p->known_err_token = NULL;
return p;
}
@@ -1972,12 +1973,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
const char *fstr;
Py_ssize_t fstrlen = -1;
- char *this_str = PyBytes_AsString(t->bytes);
- if (!this_str) {
- goto error;
- }
-
- if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
+ if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
goto error;
}
diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h
index cbe6f19..ffb18e4 100644
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@@ -71,6 +71,7 @@ typedef struct {
int flags;
int feature_version;
growable_comment_array type_ignore_comments;
+ Token *known_err_token;
} Parser;
typedef struct {