summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-01-21 20:26:52 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-01-21 20:26:52 (GMT)
commit65fd0592fb3845c17b27c441380553fc22f78812 (patch)
tree514cb4f7fb01bcc581922bee799c3bfbe58a9cd2
parentf7d2874d3097054e030f0169f5eed92af488acbe (diff)
downloadcpython-65fd0592fb3845c17b27c441380553fc22f78812.zip
cpython-65fd0592fb3845c17b27c441380553fc22f78812.tar.gz
cpython-65fd0592fb3845c17b27c441380553fc22f78812.tar.bz2
Issue #2382: SyntaxError cursor "^" now is written at correct position in most
cases when multibyte characters are in line (before "^"). This still not works correctly with wide East Asian characters.
-rw-r--r--Lib/test/test_exceptions.py13
-rw-r--r--Lib/test/test_traceback.py9
-rw-r--r--Misc/NEWS4
-rw-r--r--Python/pythonrun.c14
4 files changed, 38 insertions, 2 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 1ad7f97..fe660bf 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -148,6 +148,19 @@ class ExceptionTests(unittest.TestCase):
ckmsg(s, "'continue' not properly in loop")
ckmsg("continue\n", "'continue' not properly in loop")
+ def testSyntaxErrorOffset(self):
+ def check(src, lineno, offset):
+ with self.assertRaises(SyntaxError) as cm:
+ compile(src, '<fragment>', 'exec')
+ self.assertEqual(cm.exception.lineno, lineno)
+ self.assertEqual(cm.exception.offset, offset)
+
+ check('def fact(x):\n\treturn x!\n', 2, 10)
+ check('1 +\n', 1, 4)
+ check('def spam():\n print(1)\n print(2)', 3, 10)
+ check('Python = "Python" +', 1, 20)
+ check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+
@cpython_only
def testSettingException(self):
# test that setting an exception at the C level works even if the
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index bca825d..373d9af 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -32,6 +32,9 @@ class SyntaxTracebackCases(unittest.TestCase):
def syntax_error_bad_indentation(self):
compile("def spam():\n print(1)\n print(2)", "?", "exec")
+ def syntax_error_with_caret_non_ascii(self):
+ compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec")
+
def test_caret(self):
err = self.get_exception_format(self.syntax_error_with_caret,
SyntaxError)
@@ -46,6 +49,12 @@ class SyntaxTracebackCases(unittest.TestCase):
self.assertTrue(err[2].count('\n') == 1) # and no additional newline
self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
+ err = self.get_exception_format(self.syntax_error_with_caret_non_ascii,
+ SyntaxError)
+ self.assertIn("^", err[2]) # third line has caret
+ self.assertTrue(err[2].count('\n') == 1) # and no additional newline
+ self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
+
def test_nocaret(self):
exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
err = traceback.format_exception_only(SyntaxError, exc)
diff --git a/Misc/NEWS b/Misc/NEWS
index e470fa9..4e3aa45 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ What's New in Python 3.3.4 release candidate 1?
Core and Builtins
-----------------
+- Issue #2382: SyntaxError cursor "^" is now written at correct position in most
+ cases when multibyte characters are in line (before "^"). This still not
+ works correctly with wide East Asian characters.
+
- Issue #18960: The first line of Python script could be executed twice when
the source encoding was specified on the second line. Now the source encoding
declaration on the second line isn't effective if the first line contains
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index e02dbe2..91d56b7 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -2226,6 +2226,7 @@ err_input(perrdetail *err)
PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL;
char *msg = NULL;
+ int offset = err->offset;
errtype = PyExc_SyntaxError;
switch (err->error) {
@@ -2310,11 +2311,20 @@ err_input(perrdetail *err)
errtext = Py_None;
Py_INCREF(Py_None);
} else {
- errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
+ errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
"replace");
+ if (errtext != NULL) {
+ Py_ssize_t len = strlen(err->text);
+ offset = (int)PyUnicode_GET_LENGTH(errtext);
+ if (len != err->offset) {
+ Py_DECREF(errtext);
+ errtext = PyUnicode_DecodeUTF8(err->text, len,
+ "replace");
+ }
+ }
}
v = Py_BuildValue("(OiiN)", err->filename,
- err->lineno, err->offset, errtext);
+ err->lineno, offset, errtext);
if (v != NULL) {
if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v);