From cb0748d3939c31168ab5d3b80e3677494497d5e3 Mon Sep 17 00:00:00 2001 From: tyomitch Date: Wed, 3 Apr 2019 08:12:07 +0300 Subject: bpo-36440: include node names in ParserError messages, instead of numeric IDs (GH-12565) The error messages in the parser module are referring to numeric IDs for the nodes. To improve readability, use the node names when reporting errors. --- Lib/test/test_parser.py | 16 ++++++++++++++++ .../2019-03-25-13-45-19.bpo-36440.gkvzhi.rst | 2 ++ Modules/parsermodule.c | 17 +++++++++++------ 3 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py index bfa0a5a..ff587c3 100644 --- a/Lib/test/test_parser.py +++ b/Lib/test/test_parser.py @@ -749,6 +749,22 @@ class IllegalSyntaxTestCase(unittest.TestCase): with self.assertRaises(UnicodeEncodeError): parser.sequence2st(tree) + def test_invalid_node_id(self): + tree = (257, (269, (-7, ''))) + self.check_bad_tree(tree, "negative node id") + tree = (257, (269, (99, ''))) + self.check_bad_tree(tree, "invalid token id") + tree = (257, (269, (9999, (0, '')))) + self.check_bad_tree(tree, "invalid symbol id") + + def test_ParserError_message(self): + try: + parser.sequence2st((257,(269,(257,(0,''))))) + except parser.ParserError as why: + self.assertIn("compound_stmt", str(why)) # Expected + self.assertIn("file_input", str(why)) # Got + + class CompileTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst new file mode 100644 index 0000000..372b1f7 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst @@ -0,0 +1,2 @@ +Include node names in ``ParserError`` messages, instead of numeric IDs. +Patch by A. Skrobov. diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index fd330b5..a215c7e 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -24,10 +24,6 @@ * Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations * look like "NOTE(...)". * - * To debug parser errors like - * "parser.ParserError: Expected node type 12, got 333." - * decode symbol numbers using the automatically-generated files - * Lib/symbol.h and Include/token.h. */ #include "Python.h" /* general Python API */ @@ -666,6 +662,13 @@ validate_node(node *tree) for (pos = 0; pos < nch; ++pos) { node *ch = CHILD(tree, pos); int ch_type = TYPE(ch); + if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas) + || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS)) + || (ch_type < 0) + ) { + PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type); + return 0; + } if (ch_type == suite && TYPE(tree) == funcdef) { /* This is the opposite hack of what we do in parser.c (search for func_body_suite), except we don't ever @@ -700,8 +703,10 @@ validate_node(node *tree) const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if (ISNONTERMINAL(next_type)) { - PyErr_Format(parser_error, "Expected node type %d, got %d.", - next_type, ch_type); + PyErr_Format(parser_error, "Expected %s, got %s.", + _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name, + ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] : + _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name); } else if (expected_str != NULL) { PyErr_Format(parser_error, "Illegal terminal: expected '%s'.", -- cgit v0.12