From 94cf308ee231bfbfaa9ddc50b9764545a1318773 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Mon, 17 Dec 2018 17:34:14 +0200
Subject: bpo-33306: Improve SyntaxError messages for unbalanced parentheses.
 (GH-6516)

---
 Lib/test/test_fstring.py                           | 12 +++++---
 Lib/test/test_site.py                              |  4 +--
 .../2018-04-18-12-23-30.bpo-33306.tSM3cp.rst       |  1 +
 Parser/tokenizer.c                                 | 32 ++++++++++++++++++++++
 Parser/tokenizer.h                                 |  5 +++-
 5 files changed, 47 insertions(+), 7 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 09b5ae1..fe3804b 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1004,10 +1004,14 @@ non-important content
         self.assertEqual('{d[0]}'.format(d=d), 'integer')
 
     def test_invalid_expressions(self):
-        self.assertAllRaise(SyntaxError, 'invalid syntax',
-                            [r"f'{a[4)}'",
-                             r"f'{a(4]}'",
-                            ])
+        self.assertAllRaise(SyntaxError,
+                            r"closing parenthesis '\)' does not match "
+                            r"opening parenthesis '\[' \(<fstring>, line 1\)",
+                            [r"f'{a[4)}'"])
+        self.assertAllRaise(SyntaxError,
+                            r"closing parenthesis '\]' does not match "
+                            r"opening parenthesis '\(' \(<fstring>, line 1\)",
+                            [r"f'{a(4]}'"])
 
     def test_errors(self):
         # see issue 26287
diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py
index f38e8d8..735651e 100644
--- a/Lib/test/test_site.py
+++ b/Lib/test/test_site.py
@@ -133,7 +133,7 @@ class HelperFunctionsTests(unittest.TestCase):
 
     def test_addpackage_import_bad_syntax(self):
         # Issue 10642
-        pth_dir, pth_fn = self.make_pth("import bad)syntax\n")
+        pth_dir, pth_fn = self.make_pth("import bad-syntax\n")
         with captured_stderr() as err_out:
             site.addpackage(pth_dir, pth_fn, set())
         self.assertRegex(err_out.getvalue(), "line 1")
@@ -143,7 +143,7 @@ class HelperFunctionsTests(unittest.TestCase):
         # order doesn't matter.  The next three could be a single check
         # but my regex foo isn't good enough to write it.
         self.assertRegex(err_out.getvalue(), 'Traceback')
-        self.assertRegex(err_out.getvalue(), r'import bad\)syntax')
+        self.assertRegex(err_out.getvalue(), r'import bad-syntax')
         self.assertRegex(err_out.getvalue(), 'SyntaxError')
 
     def test_addpackage_import_bad_exec(self):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst
new file mode 100644
index 0000000..2d89106
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst	
@@ -0,0 +1 @@
+Improved syntax error messages for unbalanced parentheses.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d319a4c..c246ee2 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1842,12 +1842,44 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     case '(':
     case '[':
     case '{':
+#ifndef PGEN
+        if (tok->level >= MAXLEVEL) {
+            return syntaxerror(tok, "too many nested parentheses");
+        }
+        tok->parenstack[tok->level] = c;
+        tok->parenlinenostack[tok->level] = tok->lineno;
+#endif
         tok->level++;
         break;
     case ')':
     case ']':
     case '}':
+#ifndef PGEN
+        if (!tok->level) {
+            return syntaxerror(tok, "unmatched '%c'", c);
+        }
+#endif
         tok->level--;
+#ifndef PGEN
+        int opening = tok->parenstack[tok->level];
+        if (!((opening == '(' && c == ')') ||
+              (opening == '[' && c == ']') ||
+              (opening == '{' && c == '}')))
+        {
+            if (tok->parenlinenostack[tok->level] != tok->lineno) {
+                return syntaxerror(tok,
+                        "closing parenthesis '%c' does not match "
+                        "opening parenthesis '%c' on line %d",
+                        c, opening, tok->parenlinenostack[tok->level]);
+            }
+            else {
+                return syntaxerror(tok,
+                        "closing parenthesis '%c' does not match "
+                        "opening parenthesis '%c'",
+                        c, opening);
+            }
+        }
+#endif
         break;
     }
 
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 2e31d86..cd18d25 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -11,6 +11,7 @@ extern "C" {
 #include "token.h"      /* For token types */
 
 #define MAXINDENT 100   /* Max indentation level */
+#define MAXLEVEL 200    /* Max parentheses level */
 
 enum decoding_state {
     STATE_INIT,
@@ -39,14 +40,16 @@ struct tok_state {
     int lineno;         /* Current line number */
     int level;          /* () [] {} Parentheses nesting level */
             /* Used to allow free continuations inside them */
-    /* Stuff for checking on different tab sizes */
 #ifndef PGEN
+    char parenstack[MAXLEVEL];
+    int parenlinenostack[MAXLEVEL];
     /* pgen doesn't have access to Python codecs, it cannot decode the input
        filename. The bytes filename might be kept, but it is only used by
        indenterror() and it is not really needed: pgen only compiles one file
        (Grammar/Grammar). */
     PyObject *filename;
 #endif
+    /* Stuff for checking on different tab sizes */
     int altindstack[MAXINDENT];         /* Stack of alternate indents */
     /* Stuff for PEP 0263 */
     enum decoding_state decoding_state;
-- 
cgit v0.12