bpo-36440: include node names in ParserError messages, instead of numeric IDs (GH-12565)

The error messages in the parser module are referring to numeric IDs for the nodes. To improve readability, use the node names when reporting errors.
author: tyomitch <tyomitch@gmail.com> 2019-04-03 05:12:07 (GMT)
committer: Pablo Galindo <Pablogsal@gmail.com> 2019-04-03 05:12:07 (GMT)
commit: cb0748d3939c31168ab5d3b80e3677494497d5e3 (patch)
tree: 8debb59a0158afc0dd194ea161ca2669e7bdcef7
parent: 76b387bf7402863c5e64e3459e2f91ddc3b9d2d3 (diff)
download: cpython-cb0748d3939c31168ab5d3b80e3677494497d5e3.zip
cpython-cb0748d3939c31168ab5d3b80e3677494497d5e3.tar.gz
cpython-cb0748d3939c31168ab5d3b80e3677494497d5e3.tar.bz2
3 files changed, 29 insertions, 6 deletions
diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py
index bfa0a5a..ff587c3 100644
--- a/Lib/test/test_parser.py
+++ b/Lib/test/test_parser.py
@@ -749,6 +749,22 @@ class IllegalSyntaxTestCase(unittest.TestCase):
         with self.assertRaises(UnicodeEncodeError):
             parser.sequence2st(tree)
 
+    def test_invalid_node_id(self):
+        tree = (257, (269, (-7, '')))
+        self.check_bad_tree(tree, "negative node id")
+        tree = (257, (269, (99, '')))
+        self.check_bad_tree(tree, "invalid token id")
+        tree = (257, (269, (9999, (0, ''))))
+        self.check_bad_tree(tree, "invalid symbol id")
+
+    def test_ParserError_message(self):
+        try:
+            parser.sequence2st((257,(269,(257,(0,'')))))
+        except parser.ParserError as why:
+            self.assertIn("compound_stmt", str(why))  # Expected
+            self.assertIn("file_input", str(why))     # Got
+
+
 
 class CompileTestCase(unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst
new file mode 100644
index 0000000..372b1f7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst
@@ -0,0 +1,2 @@
+Include node names in ``ParserError`` messages, instead of numeric IDs.
+Patch by A. Skrobov.
diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c
index fd330b5..a215c7e 100644
--- a/Modules/parsermodule.c
+++ b/Modules/parsermodule.c
@@ -24,10 +24,6 @@
  *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
  *  look like "NOTE(...)".
  *
- *  To debug parser errors like
- *      "parser.ParserError: Expected node type 12, got 333."
- *  decode symbol numbers using the automatically-generated files
- *  Lib/symbol.h and Include/token.h.
  */
 
 #include "Python.h"                     /* general Python API             */
@@ -666,6 +662,13 @@ validate_node(node *tree)
     for (pos = 0; pos < nch; ++pos) {
         node *ch = CHILD(tree, pos);
         int ch_type = TYPE(ch);
+        if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas)
+            || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS))
+            || (ch_type < 0)
+           ) {
+            PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type);
+            return 0;
+        }
         if (ch_type == suite && TYPE(tree) == funcdef) {
             /* This is the opposite hack of what we do in parser.c
                (search for func_body_suite), except we don't ever
@@ -700,8 +703,10 @@ validate_node(node *tree)
             const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
 
             if (ISNONTERMINAL(next_type)) {
-                PyErr_Format(parser_error, "Expected node type %d, got %d.",
-                             next_type, ch_type);
+                PyErr_Format(parser_error, "Expected %s, got %s.",
+                             _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name,
+                             ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] :
+                             _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name);
             }
             else if (expected_str != NULL) {
                 PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",
author	tyomitch <tyomitch@gmail.com>	2019-04-03 05:12:07 (GMT)
committer	Pablo Galindo <Pablogsal@gmail.com>	2019-04-03 05:12:07 (GMT)
commit	cb0748d3939c31168ab5d3b80e3677494497d5e3 (patch)
tree	8debb59a0158afc0dd194ea161ca2669e7bdcef7
parent	76b387bf7402863c5e64e3459e2f91ddc3b9d2d3 (diff)
download	cpython-cb0748d3939c31168ab5d3b80e3677494497d5e3.zip cpython-cb0748d3939c31168ab5d3b80e3677494497d5e3.tar.gz cpython-cb0748d3939c31168ab5d3b80e3677494497d5e3.tar.bz2