diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2020-05-25 20:11:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-25 20:11:36 (GMT) |
commit | 3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a (patch) | |
tree | 59defc555399f9962e7de0834096672c6f548de1 | |
parent | 318a18eb889e8733ffb25ada139fdd423606a609 (diff) | |
download | cpython-3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a.zip cpython-3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a.tar.gz cpython-3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a.tar.bz2 |
bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235)
The scripts in `Tools/peg_generator/scripts` mostly assume that
`ast.parse` and `compile` use the old parser, since this was the
state of things, while we were developing them. They need to be
updated to always use the correct parser. `_peg_parser` is being
extended to support both parsing and compiling with both parsers.
(cherry picked from commit 9645930b5bc1833ef495891d22052d1ba65ab7ea)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
-rw-r--r-- | Modules/_peg_parser.c | 133 | ||||
-rw-r--r-- | Tools/peg_generator/Makefile | 24 | ||||
-rw-r--r-- | Tools/peg_generator/scripts/benchmark.py | 41 | ||||
-rwxr-xr-x | Tools/peg_generator/scripts/show_parse.py | 18 | ||||
-rwxr-xr-x | Tools/peg_generator/scripts/test_parse_directory.py | 80 | ||||
-rwxr-xr-x | Tools/peg_generator/scripts/test_pypi_packages.py | 12 |
6 files changed, 136 insertions, 172 deletions
diff --git a/Modules/_peg_parser.c b/Modules/_peg_parser.c index 3b27b2c..b66d5a8 100644 --- a/Modules/_peg_parser.c +++ b/Modules/_peg_parser.c @@ -1,104 +1,133 @@ #include <Python.h> #include "pegen_interface.h" -PyObject * -_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds) +static int +_mode_str_to_int(char *mode_str) { - static char *keywords[] = {"file", "mode", NULL}; - char *filename; - char *mode_str = "exec"; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) { - return NULL; - } - int mode; if (strcmp(mode_str, "exec") == 0) { mode = Py_file_input; } + else if (strcmp(mode_str, "eval") == 0) { + mode = Py_eval_input; + } else if (strcmp(mode_str, "single") == 0) { mode = Py_single_input; } else { - return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'"); + mode = -1; } + return mode; +} - PyArena *arena = PyArena_New(); - if (arena == NULL) { +static mod_ty +_run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser) +{ + mod_ty mod; + if (!oldparser) { + mod = PyPegen_ASTFromString(str, filename, mode, flags, arena); + } + else { + mod = PyParser_ASTFromString(str, filename, mode, flags, arena); + } + return mod; +} + +PyObject * +_Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; + char *the_string; + char *filename = "<string>"; + char *mode_str = "exec"; + int oldparser = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, + &the_string, &filename, &mode_str, &oldparser)) { return NULL; } + int mode = _mode_str_to_int(mode_str); + if (mode == -1) { + return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'"); + } + PyCompilerFlags flags = _PyCompilerFlags_INIT; - PyObject *result = NULL; + flags.cf_flags = PyCF_IGNORE_COOKIE; - mod_ty res = PyPegen_ASTFromFilename(filename, mode, &flags, arena); - if (res == NULL) { - goto error; + PyArena *arena = PyArena_New(); + if (arena == NULL) { + return NULL; + } + + mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser); + if (mod == NULL) { + PyArena_Free(arena); + return NULL; } - result = PyAST_mod2obj(res); -error: + PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename); + if (filename_ob == NULL) { + PyArena_Free(arena); + return NULL; + } + PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena); + Py_XDECREF(filename_ob); PyArena_Free(arena); - return result; + return (PyObject *)result; } PyObject * _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"string", "mode", "oldparser", NULL}; + static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; char *the_string; + char *filename = "<string>"; char *mode_str = "exec"; int oldparser = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sp", keywords, - &the_string, &mode_str, &oldparser)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, + &the_string, &filename, &mode_str, &oldparser)) { return NULL; } - int mode; - if (strcmp(mode_str, "exec") == 0) { - mode = Py_file_input; - } - else if (strcmp(mode_str, "eval") == 0) { - mode = Py_eval_input; - } - else if (strcmp(mode_str, "single") == 0) { - mode = Py_single_input; - } - else { + int mode = _mode_str_to_int(mode_str); + if (mode == -1) { return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'"); } + PyCompilerFlags flags = _PyCompilerFlags_INIT; + flags.cf_flags = PyCF_IGNORE_COOKIE; + PyArena *arena = PyArena_New(); if (arena == NULL) { return NULL; } - PyObject *result = NULL; - - PyCompilerFlags flags = _PyCompilerFlags_INIT; - flags.cf_flags = PyCF_IGNORE_COOKIE; - - mod_ty res; - if (oldparser) { - res = PyParser_ASTFromString(the_string, "<string>", mode, &flags, arena); - } - else { - res = PyPegen_ASTFromString(the_string, "<string>", mode, &flags, arena); - } - if (res == NULL) { - goto error; + mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser); + if (mod == NULL) { + PyArena_Free(arena); + return NULL; } - result = PyAST_mod2obj(res); -error: + PyObject *result = PyAST_mod2obj(mod); PyArena_Free(arena); return result; } static PyMethodDef ParseMethods[] = { - {"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."}, - {"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."}, + { + "parse_string", + (PyCFunction)(void (*)(void))_Py_parse_string, + METH_VARARGS|METH_KEYWORDS, + "Parse a string, return an AST." + }, + { + "compile_string", + (PyCFunction)(void (*)(void))_Py_compile_string, + METH_VARARGS|METH_KEYWORDS, + "Compile a string, return a code object." + }, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile index 34763b5..e7a190c 100644 --- a/Tools/peg_generator/Makefile +++ b/Tools/peg_generator/Makefile @@ -69,25 +69,22 @@ stats: peg_extension/parse.c data/xxl.py time: time_compile -time_compile: venv peg_extension/parse.c data/xxl.py +time_compile: venv data/xxl.py $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile -time_parse: venv peg_extension/parse.c data/xxl.py +time_parse: venv data/xxl.py $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse -time_check: venv peg_extension/parse.c data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl check +time_old: time_old_compile -time_stdlib: time_stdlib_compile - -time_stdlib_compile: venv peg_extension/parse.c data/xxl.py +time_old_compile: venv data/xxl.py $(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile -time_stdlib_parse: venv peg_extension/parse.c data/xxl.py +time_old_parse: venv data/xxl.py $(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse -test_local: - $(PYTHON) scripts/test_parse_directory.py \ +time_peg_dir: venv + $(VENVPYTHON) scripts/test_parse_directory.py \ --grammar-file $(GRAMMAR) \ --tokens-file $(TOKENS) \ -d $(TESTDIR) \ @@ -96,8 +93,8 @@ test_local: --exclude "*/failset/**" \ --exclude "*/failset/**/*" -test_global: $(CPYTHON) - $(PYTHON) scripts/test_parse_directory.py \ +time_stdlib: $(CPYTHON) venv + $(VENVPYTHON) scripts/test_parse_directory.py \ --grammar-file $(GRAMMAR) \ --tokens-file $(TOKENS) \ -d $(CPYTHON) \ @@ -113,9 +110,6 @@ mypy: regen-metaparser format-python: black pegen scripts -bench: venv - $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check - format: format-python find_max_nesting: diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py index 4942b99..71512c2 100644 --- a/Tools/peg_generator/scripts/benchmark.py +++ b/Tools/peg_generator/scripts/benchmark.py @@ -6,6 +6,8 @@ import sys import os from time import time +import _peg_parser + try: import memory_profiler except ModuleNotFoundError: @@ -14,8 +16,6 @@ except ModuleNotFoundError: sys.exit(1) sys.path.insert(0, os.getcwd()) -from peg_extension import parse -from pegen.build import build_c_parser_and_generator from scripts.test_parse_directory import parse_directory argparser = argparse.ArgumentParser( @@ -41,9 +41,6 @@ command_compile = subcommands.add_parser( "compile", help="Benchmark parsing and compiling to bytecode" ) command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") -command_check = subcommands.add_parser( - "check", help="Benchmark parsing and throwing the tree away" -) def benchmark(func): @@ -66,22 +63,20 @@ def benchmark(func): @benchmark def time_compile(source, parser): if parser == "cpython": - return compile(source, os.path.join("data", "xxl.py"), "exec") + return _peg_parser.compile_string( + source, + oldparser=True, + ) else: - return parse.parse_string(source, mode=2) + return _peg_parser.compile_string(source) @benchmark def time_parse(source, parser): if parser == "cpython": - return ast.parse(source, os.path.join("data", "xxl.py"), "exec") + return _peg_parser.parse_string(source, oldparser=True) else: - return parse.parse_string(source, mode=1) - - -@benchmark -def time_check(source): - return parse.parse_string(source, mode=0) + return _peg_parser.parse_string(source) def run_benchmark_xxl(subcommand, parser, source): @@ -89,32 +84,20 @@ def run_benchmark_xxl(subcommand, parser, source): time_compile(source, parser) elif subcommand == "parse": time_parse(source, parser) - elif subcommand == "check": - time_check(source) def run_benchmark_stdlib(subcommand, parser): - modes = {"compile": 2, "parse": 1, "check": 0} - extension = None - if parser == "pegen": - extension = build_c_parser_and_generator( - "../../Grammar/python.gram", - "../../Grammar/Tokens", - "peg_extension/parse.c", - compile_extension=True, - skip_actions=False, - ) for _ in range(3): parse_directory( "../../Lib", "../../Grammar/python.gram", + "../../Grammar/Tokens", verbose=False, excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], skip_actions=False, tree_arg=0, short=True, - extension=extension, - mode=modes[subcommand], + mode=2 if subcommand == "compile" else 1, parser=parser, ) @@ -127,8 +110,6 @@ def main(): if subcommand is None: argparser.error("A benchmark to run is required") - if subcommand == "check" and parser == "cpython": - argparser.error("Cannot use check target with the CPython parser") if target == "xxl": with open(os.path.join("data", "xxl.py"), "r") as f: diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py index 1a0410e..1c1996f 100755 --- a/Tools/peg_generator/scripts/show_parse.py +++ b/Tools/peg_generator/scripts/show_parse.py @@ -30,6 +30,8 @@ import os import sys import tempfile +import _peg_parser + from typing import List sys.path.insert(0, os.getcwd()) @@ -72,7 +74,7 @@ def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]: def show_parse(source: str, verbose: bool = False) -> str: - tree = ast.parse(source) + tree = _peg_parser.parse_string(source, oldparser=True) return format_tree(tree, verbose).rstrip("\n") @@ -90,17 +92,11 @@ def main() -> None: sep = " " program = sep.join(args.program) if args.grammar_file: - sys.path.insert(0, os.curdir) - from pegen.build import build_parser_and_generator - - build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True) - from pegen.parse import parse_string # type: ignore[import] - - tree = parse_string(program, mode=1) + tree = _peg_parser.parse_string(program) if args.diff: a = tree - b = ast.parse(program) + b = _peg_parser.parse_string(program, oldparser=True) diff = diff_trees(a, b, args.verbose) if diff: for line in diff: @@ -111,8 +107,8 @@ def main() -> None: print(f"# Parsed using {args.grammar_file}") print(format_tree(tree, args.verbose)) else: - tree = ast.parse(program) - print("# Parse using ast.parse()") + tree = _peg_parser.parse_string(program, oldparser=True) + print("# Parse using the old parser") print(format_tree(tree, args.verbose)) diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py index aef9c74..e88afe1 100755 --- a/Tools/peg_generator/scripts/test_parse_directory.py +++ b/Tools/peg_generator/scripts/test_parse_directory.py @@ -6,13 +6,14 @@ import os import sys import time import traceback +import tokenize +import _peg_parser from glob import glob from pathlib import PurePath from typing import List, Optional, Any sys.path.insert(0, os.getcwd()) -from pegen.build import build_c_parser_and_generator from pegen.ast_dump import ast_dump from pegen.testutil import print_memstats from scripts import show_parse @@ -83,7 +84,7 @@ def compare_trees( actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False, ) -> int: with open(file) as f: - expected_tree = ast.parse(f.read()) + expected_tree = _peg_parser.parse_string(f.read(), oldparser=True) expected_text = ast_dump(expected_tree, include_attributes=include_attributes) actual_text = ast_dump(actual_tree, include_attributes=include_attributes) @@ -121,7 +122,6 @@ def parse_directory( skip_actions: bool, tree_arg: int, short: bool, - extension: Any, mode: int, parser: str, ) -> int: @@ -137,47 +137,21 @@ def parse_directory( if not os.path.exists(grammar_file): print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr) return 1 - - try: - if not extension and parser == "pegen": - build_c_parser_and_generator( - grammar_file, - tokens_file, - "peg_extension/parse.c", - compile_extension=True, - skip_actions=skip_actions, - ) - except Exception as err: - print( - f"{FAIL}The following error occurred when generating the parser. Please check your grammar file.\n{ENDC}", - file=sys.stderr, - ) - traceback.print_exception(err.__class__, err, None) - - return 1 - else: print( "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n" ) - if parser == "pegen": - try: - from peg_extension import parse # type: ignore - except Exception as e: - print( - "An existing parser was not found. Please run `make` or specify a grammar file with the `-g` flag.", - file=sys.stderr, - ) - return 1 + if tree_arg: + assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees" # For a given directory, traverse files and attempt to parse each one # - Output success/failure for each file errors = 0 files = [] trees = {} # Trees to compare (after everything else is done) + total_seconds = 0 - t0 = time.time() for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): # Only attempt to parse Python files and files that are not excluded should_exclude_file = False @@ -187,25 +161,31 @@ def parse_directory( break if not should_exclude_file: + with tokenize.open(file) as f: + source = f.read() try: - if tree_arg: - mode = 1 - if parser == "cpython": - with open(file, "r") as f: - source = f.read() - if mode == 2: - compile(source, file, "exec") - elif mode == 1: - ast.parse(source, file, "exec") + t0 = time.time() + if mode == 2: + result = _peg_parser.compile_string( + source, + filename=file, + oldparser=parser == "cpython", + ) else: - tree = parse.parse_file(file, mode=mode) + result = _peg_parser.parse_string( + source, + filename=file, + oldparser=parser == "cpython" + ) + t1 = time.time() + total_seconds += (t1 - t0) if tree_arg: - trees[file] = tree + trees[file] = result if not short: report_status(succeeded=True, file=file, verbose=verbose) except Exception as error: try: - ast.parse(file) + _peg_parser.parse_string(source, mode="exec", oldparser=True) except Exception: if not short: print(f"File {file} cannot be parsed by either pegen or the ast module.") @@ -217,7 +197,6 @@ def parse_directory( files.append(file) t1 = time.time() - total_seconds = t1 - t0 total_files = len(files) total_bytes = 0 @@ -238,13 +217,6 @@ def parse_directory( f"or {total_bytes / total_seconds :,.0f} bytes/sec.", ) - if parser == "pegen": - # Dump memo stats to @data. - with open("@data", "w") as datafile: - for i, count in enumerate(parse.get_memo_stats()): - if count: - datafile.write(f"{i:4d} {count:9d}\n") - if short: print_memstats() @@ -275,6 +247,7 @@ def main() -> None: skip_actions = args.skip_actions tree = args.tree short = args.short + mode = 1 if args.tree else 2 sys.exit( parse_directory( directory, @@ -285,8 +258,7 @@ def main() -> None: skip_actions, tree, short, - None, - 0, + mode, "pegen", ) ) diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py index 7586b1a..98f7778 100755 --- a/Tools/peg_generator/scripts/test_pypi_packages.py +++ b/Tools/peg_generator/scripts/test_pypi_packages.py @@ -54,7 +54,7 @@ def find_dirname(package_name: str) -> str: assert False # This is to fix mypy, should never be reached -def run_tests(dirname: str, tree: int, extension: Any) -> int: +def run_tests(dirname: str, tree: int) -> int: return test_parse_directory.parse_directory( dirname, HERE / ".." / ".." / ".." / "Grammar" / "python.gram", @@ -72,7 +72,6 @@ def run_tests(dirname: str, tree: int, extension: Any) -> int: skip_actions=False, tree_arg=tree, short=True, - extension=extension, mode=1, parser="pegen", ) @@ -82,13 +81,6 @@ def main() -> None: args = argparser.parse_args() tree = args.tree - extension = build.build_c_parser_and_generator( - HERE / ".." / ".." / ".." / "Grammar" / "python.gram", - HERE / ".." / ".." / ".." / "Grammar" / "Tokens", - "peg_extension/parse.c", - compile_extension=True, - ) - for package in get_packages(): print(f"Extracting files from {package}... ", end="") try: @@ -100,7 +92,7 @@ def main() -> None: print(f"Trying to parse all python files ... ") dirname = find_dirname(package) - status = run_tests(dirname, tree, extension) + status = run_tests(dirname, tree) if status == 0: shutil.rmtree(dirname) else: |