Issue #2382: SyntaxError cursor "^" now is written at correct position in most

cases when multibyte characters are in line (before "^"). This still not works correctly with wide East Asian characters.
author: Serhiy Storchaka <storchaka@gmail.com> 2014-01-21 20:29:47 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2014-01-21 20:29:47 (GMT)
commit: 2bd59daf58431284e13f3e080a52cab032eb792f (patch)
tree: f84cc2dee484690fc0641850a7f64042583bb033
parent: 567b26e882a3a73f37f69390f3a34ec533ff4590 (diff)
parent: 65fd0592fb3845c17b27c441380553fc22f78812 (diff)
download: cpython-2bd59daf58431284e13f3e080a52cab032eb792f.zip
cpython-2bd59daf58431284e13f3e080a52cab032eb792f.tar.gz
cpython-2bd59daf58431284e13f3e080a52cab032eb792f.tar.bz2
4 files changed, 38 insertions, 2 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index f0851bd..4770dfd 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -148,6 +148,19 @@ class ExceptionTests(unittest.TestCase):
         ckmsg(s, "'continue' not properly in loop")
         ckmsg("continue\n", "'continue' not properly in loop")
 
+    def testSyntaxErrorOffset(self):
+        def check(src, lineno, offset):
+            with self.assertRaises(SyntaxError) as cm:
+                compile(src, '<fragment>', 'exec')
+            self.assertEqual(cm.exception.lineno, lineno)
+            self.assertEqual(cm.exception.offset, offset)
+
+        check('def fact(x):\n\treturn x!\n', 2, 10)
+        check('1 +\n', 1, 4)
+        check('def spam():\n  print(1)\n print(2)', 3, 10)
+        check('Python = "Python" +', 1, 20)
+        check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+
     @cpython_only
     def testSettingException(self):
         # test that setting an exception at the C level works even if the
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 172f5b5..a6ae1e5 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -32,6 +32,9 @@ class SyntaxTracebackCases(unittest.TestCase):
     def syntax_error_bad_indentation(self):
         compile("def spam():\n  print(1)\n print(2)", "?", "exec")
 
+    def syntax_error_with_caret_non_ascii(self):
+        compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec")
+
     def test_caret(self):
         err = self.get_exception_format(self.syntax_error_with_caret,
                                         SyntaxError)
@@ -46,6 +49,12 @@ class SyntaxTracebackCases(unittest.TestCase):
         self.assertTrue(err[2].count('\n') == 1) # and no additional newline
         self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
 
+        err = self.get_exception_format(self.syntax_error_with_caret_non_ascii,
+                                        SyntaxError)
+        self.assertIn("^", err[2]) # third line has caret
+        self.assertTrue(err[2].count('\n') == 1) # and no additional newline
+        self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
+
     def test_nocaret(self):
         exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
         err = traceback.format_exception_only(SyntaxError, exc)
diff --git a/Misc/NEWS b/Misc/NEWS
index ad37fa5..d664046 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ Release date: 2014-01-19
 Core and Builtins
 -----------------
 
+- Issue #2382: SyntaxError cursor "^" is now written at correct position in most
+  cases when multibyte characters are in line (before "^").  This still not
+  works correctly with wide East Asian characters.
+
 - Issue #18960: The first line of Python script could be executed twice when
   the source encoding was specified on the second line.  Now the source encoding
   declaration on the second line isn't effective if the first line contains
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 97daecc..ff9569b 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -2470,6 +2470,7 @@ err_input(perrdetail *err)
     PyObject *v, *w, *errtype, *errtext;
     PyObject *msg_obj = NULL;
     char *msg = NULL;
+    int offset = err->offset;
 
     errtype = PyExc_SyntaxError;
     switch (err->error) {
@@ -2554,11 +2555,20 @@ err_input(perrdetail *err)
         errtext = Py_None;
         Py_INCREF(Py_None);
     } else {
-        errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
+        errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
                                        "replace");
+        if (errtext != NULL) {
+            Py_ssize_t len = strlen(err->text);
+            offset = (int)PyUnicode_GET_LENGTH(errtext);
+            if (len != err->offset) {
+                Py_DECREF(errtext);
+                errtext = PyUnicode_DecodeUTF8(err->text, len,
+                                               "replace");
+            }
+        }
     }
     v = Py_BuildValue("(OiiN)", err->filename,
-                      err->lineno, err->offset, errtext);
+                      err->lineno, offset, errtext);
     if (v != NULL) {
         if (msg_obj)
             w = Py_BuildValue("(OO)", msg_obj, v);
author	Serhiy Storchaka <storchaka@gmail.com>	2014-01-21 20:29:47 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2014-01-21 20:29:47 (GMT)
commit	2bd59daf58431284e13f3e080a52cab032eb792f (patch)
tree	f84cc2dee484690fc0641850a7f64042583bb033
parent	567b26e882a3a73f37f69390f3a34ec533ff4590 (diff)
parent	65fd0592fb3845c17b27c441380553fc22f78812 (diff)
download	cpython-2bd59daf58431284e13f3e080a52cab032eb792f.zip cpython-2bd59daf58431284e13f3e080a52cab032eb792f.tar.gz cpython-2bd59daf58431284e13f3e080a52cab032eb792f.tar.bz2