summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBatuhan Taskaya <isidentical@gmail.com>2021-01-20 21:38:47 (GMT)
committerGitHub <noreply@github.com>2021-01-20 21:38:47 (GMT)
commita698d52c3975c80b45b139b2f08402ec514dce75 (patch)
tree25a4577b9617d80cb43ffcfe27a54435f42c6b0d
parentc3f167d7b243f8b8e1b797586e6cef35add013bc (diff)
downloadcpython-a698d52c3975c80b45b139b2f08402ec514dce75.zip
cpython-a698d52c3975c80b45b139b2f08402ec514dce75.tar.gz
cpython-a698d52c3975c80b45b139b2f08402ec514dce75.tar.bz2
bpo-40176: Improve error messages for unclosed string literals (GH-19346)
Automerge-Triggered-By: GH:isidentical
-rw-r--r--Include/errcode.h2
-rw-r--r--Lib/test/test_eof.py24
-rw-r--r--Lib/test/test_exceptions.py4
-rw-r--r--Lib/test/test_fstring.py2
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst2
-rw-r--r--Parser/pegen.c6
-rw-r--r--Parser/tokenizer.c26
7 files changed, 34 insertions, 32 deletions
diff --git a/Include/errcode.h b/Include/errcode.h
index 790518b..f2671d6 100644
--- a/Include/errcode.h
+++ b/Include/errcode.h
@@ -26,8 +26,6 @@ extern "C" {
#define E_TOODEEP 20 /* Too many indentation levels */
#define E_DEDENT 21 /* No matching outer block for dedent */
#define E_DECODE 22 /* Error in decoding into Unicode */
-#define E_EOFS 23 /* EOF in triple-quoted string */
-#define E_EOLS 24 /* EOL in single-quoted string */
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
#define E_BADSINGLE 27 /* Ill-formed single statement input */
diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py
index 2cf263d..b370e27 100644
--- a/Lib/test/test_eof.py
+++ b/Lib/test/test_eof.py
@@ -7,23 +7,25 @@ from test.support import script_helper
import unittest
class EOFTestCase(unittest.TestCase):
- def test_EOFC(self):
- expect = "EOL while scanning string literal (<string>, line 1)"
- try:
- eval("""'this is a test\
- """)
- except SyntaxError as msg:
- self.assertEqual(str(msg), expect)
- else:
- raise support.TestFailed
+ def test_EOF_single_quote(self):
+ expect = "unterminated string literal (detected at line 1) (<string>, line 1)"
+ for quote in ("'", "\""):
+ try:
+ eval(f"""{quote}this is a test\
+ """)
+ except SyntaxError as msg:
+ self.assertEqual(str(msg), expect)
+ self.assertEqual(msg.offset, 1)
+ else:
+ raise support.TestFailed
def test_EOFS(self):
- expect = ("EOF while scanning triple-quoted string literal "
- "(<string>, line 1)")
+ expect = ("unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)")
try:
eval("""'''this is a test""")
except SyntaxError as msg:
self.assertEqual(str(msg), expect)
+ self.assertEqual(msg.offset, 1)
else:
raise support.TestFailed
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index eb70d7b..21878c3 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -206,7 +206,7 @@ class ExceptionTests(unittest.TestCase):
check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
2, 19, encoding='cp1251')
check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
- check('x = "a', 1, 7)
+ check('x = "a', 1, 5)
check('lambda x: x = 2', 1, 1)
check('f{a + b + c}', 1, 2)
check('[file for str(file) in []\n])', 1, 11)
@@ -238,7 +238,7 @@ class ExceptionTests(unittest.TestCase):
def baz():
'''quux'''
- """, 9, 20)
+ """, 9, 24)
check("pass\npass\npass\n(1+)\npass\npass\npass", 4, 4)
check("(1+)", 1, 4)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 2345832..7ca1512 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -661,7 +661,7 @@ x = (
["f'{3)+(4}'",
])
- self.assertAllRaise(SyntaxError, 'EOL while scanning string literal',
+ self.assertAllRaise(SyntaxError, 'unterminated string literal',
["f'{\n}'",
])
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst
new file mode 100644
index 0000000..df7de3b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst
@@ -0,0 +1,2 @@
+Syntax errors for unterminated string literals now point to the start
+of the string instead of reporting EOF/EOL.
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 0d39030..0e7f86b 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -327,12 +327,6 @@ tokenizer_error(Parser *p)
case E_TOKEN:
msg = "invalid token";
break;
- case E_EOFS:
- RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal");
- return -1;
- case E_EOLS:
- RAISE_SYNTAX_ERROR("EOL while scanning string literal");
- return -1;
case E_EOF:
if (p->tok->level) {
raise_unclosed_parentheses_error(p);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d3e846c..d9334aa 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1739,20 +1739,26 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
- if (c == EOF) {
+ if (c == EOF || (quote_size == 1 && c == '\n')) {
+ // shift the tok_state's location into
+ // the start of string, and report the error
+ // from the initial quote character
+ tok->cur = (char *)tok->start;
+ tok->cur++;
+ tok->line_start = tok->multi_line_start;
+ int start = tok->lineno;
+ tok->lineno = tok->first_lineno;
+
if (quote_size == 3) {
- tok->done = E_EOFS;
+ return syntaxerror(tok,
+ "unterminated triple-quoted string literal"
+ " (detected at line %d)", start);
}
else {
- tok->done = E_EOLS;
+ return syntaxerror(tok,
+ "unterminated string literal (detected at"
+ " line %d)", start);
}
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (quote_size == 1 && c == '\n') {
- tok->done = E_EOLS;
- tok->cur = tok->inp;
- return ERRORTOKEN;
}
if (c == quote) {
end_quote_size += 1;