summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2020-05-25 20:11:36 (GMT)
committerGitHub <noreply@github.com>2020-05-25 20:11:36 (GMT)
commit3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a (patch)
tree59defc555399f9962e7de0834096672c6f548de1
parent318a18eb889e8733ffb25ada139fdd423606a609 (diff)
downloadcpython-3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a.zip
cpython-3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a.tar.gz
cpython-3c6c86ab77464e6bcb489064d0ec1be5d1b19f3a.tar.bz2
bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235)
The scripts in `Tools/peg_generator/scripts` mostly assume that `ast.parse` and `compile` use the old parser, since this was the state of things, while we were developing them. They need to be updated to always use the correct parser. `_peg_parser` is being extended to support both parsing and compiling with both parsers. (cherry picked from commit 9645930b5bc1833ef495891d22052d1ba65ab7ea) Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
-rw-r--r--Modules/_peg_parser.c133
-rw-r--r--Tools/peg_generator/Makefile24
-rw-r--r--Tools/peg_generator/scripts/benchmark.py41
-rwxr-xr-xTools/peg_generator/scripts/show_parse.py18
-rwxr-xr-xTools/peg_generator/scripts/test_parse_directory.py80
-rwxr-xr-xTools/peg_generator/scripts/test_pypi_packages.py12
6 files changed, 136 insertions, 172 deletions
diff --git a/Modules/_peg_parser.c b/Modules/_peg_parser.c
index 3b27b2c..b66d5a8 100644
--- a/Modules/_peg_parser.c
+++ b/Modules/_peg_parser.c
@@ -1,104 +1,133 @@
#include <Python.h>
#include "pegen_interface.h"
-PyObject *
-_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)
+static int
+_mode_str_to_int(char *mode_str)
{
- static char *keywords[] = {"file", "mode", NULL};
- char *filename;
- char *mode_str = "exec";
-
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) {
- return NULL;
- }
-
int mode;
if (strcmp(mode_str, "exec") == 0) {
mode = Py_file_input;
}
+ else if (strcmp(mode_str, "eval") == 0) {
+ mode = Py_eval_input;
+ }
else if (strcmp(mode_str, "single") == 0) {
mode = Py_single_input;
}
else {
- return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'");
+ mode = -1;
}
+ return mode;
+}
- PyArena *arena = PyArena_New();
- if (arena == NULL) {
+static mod_ty
+_run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser)
+{
+ mod_ty mod;
+ if (!oldparser) {
+ mod = PyPegen_ASTFromString(str, filename, mode, flags, arena);
+ }
+ else {
+ mod = PyParser_ASTFromString(str, filename, mode, flags, arena);
+ }
+ return mod;
+}
+
+PyObject *
+_Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
+ char *the_string;
+ char *filename = "<string>";
+ char *mode_str = "exec";
+ int oldparser = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
+ &the_string, &filename, &mode_str, &oldparser)) {
return NULL;
}
+ int mode = _mode_str_to_int(mode_str);
+ if (mode == -1) {
+ return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
+ }
+
PyCompilerFlags flags = _PyCompilerFlags_INIT;
- PyObject *result = NULL;
+ flags.cf_flags = PyCF_IGNORE_COOKIE;
- mod_ty res = PyPegen_ASTFromFilename(filename, mode, &flags, arena);
- if (res == NULL) {
- goto error;
+ PyArena *arena = PyArena_New();
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
+ if (mod == NULL) {
+ PyArena_Free(arena);
+ return NULL;
}
- result = PyAST_mod2obj(res);
-error:
+ PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename);
+ if (filename_ob == NULL) {
+ PyArena_Free(arena);
+ return NULL;
+ }
+ PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena);
+ Py_XDECREF(filename_ob);
PyArena_Free(arena);
- return result;
+ return (PyObject *)result;
}
PyObject *
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
{
- static char *keywords[] = {"string", "mode", "oldparser", NULL};
+ static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
char *the_string;
+ char *filename = "<string>";
char *mode_str = "exec";
int oldparser = 0;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sp", keywords,
- &the_string, &mode_str, &oldparser)) {
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
+ &the_string, &filename, &mode_str, &oldparser)) {
return NULL;
}
- int mode;
- if (strcmp(mode_str, "exec") == 0) {
- mode = Py_file_input;
- }
- else if (strcmp(mode_str, "eval") == 0) {
- mode = Py_eval_input;
- }
- else if (strcmp(mode_str, "single") == 0) {
- mode = Py_single_input;
- }
- else {
+ int mode = _mode_str_to_int(mode_str);
+ if (mode == -1) {
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
}
+ PyCompilerFlags flags = _PyCompilerFlags_INIT;
+ flags.cf_flags = PyCF_IGNORE_COOKIE;
+
PyArena *arena = PyArena_New();
if (arena == NULL) {
return NULL;
}
- PyObject *result = NULL;
-
- PyCompilerFlags flags = _PyCompilerFlags_INIT;
- flags.cf_flags = PyCF_IGNORE_COOKIE;
-
- mod_ty res;
- if (oldparser) {
- res = PyParser_ASTFromString(the_string, "<string>", mode, &flags, arena);
- }
- else {
- res = PyPegen_ASTFromString(the_string, "<string>", mode, &flags, arena);
- }
- if (res == NULL) {
- goto error;
+ mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
+ if (mod == NULL) {
+ PyArena_Free(arena);
+ return NULL;
}
- result = PyAST_mod2obj(res);
-error:
+ PyObject *result = PyAST_mod2obj(mod);
PyArena_Free(arena);
return result;
}
static PyMethodDef ParseMethods[] = {
- {"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
- {"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."},
+ {
+ "parse_string",
+ (PyCFunction)(void (*)(void))_Py_parse_string,
+ METH_VARARGS|METH_KEYWORDS,
+ "Parse a string, return an AST."
+ },
+ {
+ "compile_string",
+ (PyCFunction)(void (*)(void))_Py_compile_string,
+ METH_VARARGS|METH_KEYWORDS,
+ "Compile a string, return a code object."
+ },
{NULL, NULL, 0, NULL} /* Sentinel */
};
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
index 34763b5..e7a190c 100644
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@@ -69,25 +69,22 @@ stats: peg_extension/parse.c data/xxl.py
time: time_compile
-time_compile: venv peg_extension/parse.c data/xxl.py
+time_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
-time_parse: venv peg_extension/parse.c data/xxl.py
+time_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
-time_check: venv peg_extension/parse.c data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl check
+time_old: time_old_compile
-time_stdlib: time_stdlib_compile
-
-time_stdlib_compile: venv peg_extension/parse.c data/xxl.py
+time_old_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
-time_stdlib_parse: venv peg_extension/parse.c data/xxl.py
+time_old_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
-test_local:
- $(PYTHON) scripts/test_parse_directory.py \
+time_peg_dir: venv
+ $(VENVPYTHON) scripts/test_parse_directory.py \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(TESTDIR) \
@@ -96,8 +93,8 @@ test_local:
--exclude "*/failset/**" \
--exclude "*/failset/**/*"
-test_global: $(CPYTHON)
- $(PYTHON) scripts/test_parse_directory.py \
+time_stdlib: $(CPYTHON) venv
+ $(VENVPYTHON) scripts/test_parse_directory.py \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(CPYTHON) \
@@ -113,9 +110,6 @@ mypy: regen-metaparser
format-python:
black pegen scripts
-bench: venv
- $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check
-
format: format-python
find_max_nesting:
diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py
index 4942b99..71512c2 100644
--- a/Tools/peg_generator/scripts/benchmark.py
+++ b/Tools/peg_generator/scripts/benchmark.py
@@ -6,6 +6,8 @@ import sys
import os
from time import time
+import _peg_parser
+
try:
import memory_profiler
except ModuleNotFoundError:
@@ -14,8 +16,6 @@ except ModuleNotFoundError:
sys.exit(1)
sys.path.insert(0, os.getcwd())
-from peg_extension import parse
-from pegen.build import build_c_parser_and_generator
from scripts.test_parse_directory import parse_directory
argparser = argparse.ArgumentParser(
@@ -41,9 +41,6 @@ command_compile = subcommands.add_parser(
"compile", help="Benchmark parsing and compiling to bytecode"
)
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
-command_check = subcommands.add_parser(
- "check", help="Benchmark parsing and throwing the tree away"
-)
def benchmark(func):
@@ -66,22 +63,20 @@ def benchmark(func):
@benchmark
def time_compile(source, parser):
if parser == "cpython":
- return compile(source, os.path.join("data", "xxl.py"), "exec")
+ return _peg_parser.compile_string(
+ source,
+ oldparser=True,
+ )
else:
- return parse.parse_string(source, mode=2)
+ return _peg_parser.compile_string(source)
@benchmark
def time_parse(source, parser):
if parser == "cpython":
- return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
+ return _peg_parser.parse_string(source, oldparser=True)
else:
- return parse.parse_string(source, mode=1)
-
-
-@benchmark
-def time_check(source):
- return parse.parse_string(source, mode=0)
+ return _peg_parser.parse_string(source)
def run_benchmark_xxl(subcommand, parser, source):
@@ -89,32 +84,20 @@ def run_benchmark_xxl(subcommand, parser, source):
time_compile(source, parser)
elif subcommand == "parse":
time_parse(source, parser)
- elif subcommand == "check":
- time_check(source)
def run_benchmark_stdlib(subcommand, parser):
- modes = {"compile": 2, "parse": 1, "check": 0}
- extension = None
- if parser == "pegen":
- extension = build_c_parser_and_generator(
- "../../Grammar/python.gram",
- "../../Grammar/Tokens",
- "peg_extension/parse.c",
- compile_extension=True,
- skip_actions=False,
- )
for _ in range(3):
parse_directory(
"../../Lib",
"../../Grammar/python.gram",
+ "../../Grammar/Tokens",
verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
skip_actions=False,
tree_arg=0,
short=True,
- extension=extension,
- mode=modes[subcommand],
+ mode=2 if subcommand == "compile" else 1,
parser=parser,
)
@@ -127,8 +110,6 @@ def main():
if subcommand is None:
argparser.error("A benchmark to run is required")
- if subcommand == "check" and parser == "cpython":
- argparser.error("Cannot use check target with the CPython parser")
if target == "xxl":
with open(os.path.join("data", "xxl.py"), "r") as f:
diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py
index 1a0410e..1c1996f 100755
--- a/Tools/peg_generator/scripts/show_parse.py
+++ b/Tools/peg_generator/scripts/show_parse.py
@@ -30,6 +30,8 @@ import os
import sys
import tempfile
+import _peg_parser
+
from typing import List
sys.path.insert(0, os.getcwd())
@@ -72,7 +74,7 @@ def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
def show_parse(source: str, verbose: bool = False) -> str:
- tree = ast.parse(source)
+ tree = _peg_parser.parse_string(source, oldparser=True)
return format_tree(tree, verbose).rstrip("\n")
@@ -90,17 +92,11 @@ def main() -> None:
sep = " "
program = sep.join(args.program)
if args.grammar_file:
- sys.path.insert(0, os.curdir)
- from pegen.build import build_parser_and_generator
-
- build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True)
- from pegen.parse import parse_string # type: ignore[import]
-
- tree = parse_string(program, mode=1)
+ tree = _peg_parser.parse_string(program)
if args.diff:
a = tree
- b = ast.parse(program)
+ b = _peg_parser.parse_string(program, oldparser=True)
diff = diff_trees(a, b, args.verbose)
if diff:
for line in diff:
@@ -111,8 +107,8 @@ def main() -> None:
print(f"# Parsed using {args.grammar_file}")
print(format_tree(tree, args.verbose))
else:
- tree = ast.parse(program)
- print("# Parse using ast.parse()")
+ tree = _peg_parser.parse_string(program, oldparser=True)
+ print("# Parse using the old parser")
print(format_tree(tree, args.verbose))
diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py
index aef9c74..e88afe1 100755
--- a/Tools/peg_generator/scripts/test_parse_directory.py
+++ b/Tools/peg_generator/scripts/test_parse_directory.py
@@ -6,13 +6,14 @@ import os
import sys
import time
import traceback
+import tokenize
+import _peg_parser
from glob import glob
from pathlib import PurePath
from typing import List, Optional, Any
sys.path.insert(0, os.getcwd())
-from pegen.build import build_c_parser_and_generator
from pegen.ast_dump import ast_dump
from pegen.testutil import print_memstats
from scripts import show_parse
@@ -83,7 +84,7 @@ def compare_trees(
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
) -> int:
with open(file) as f:
- expected_tree = ast.parse(f.read())
+ expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
@@ -121,7 +122,6 @@ def parse_directory(
skip_actions: bool,
tree_arg: int,
short: bool,
- extension: Any,
mode: int,
parser: str,
) -> int:
@@ -137,47 +137,21 @@ def parse_directory(
if not os.path.exists(grammar_file):
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
return 1
-
- try:
- if not extension and parser == "pegen":
- build_c_parser_and_generator(
- grammar_file,
- tokens_file,
- "peg_extension/parse.c",
- compile_extension=True,
- skip_actions=skip_actions,
- )
- except Exception as err:
- print(
- f"{FAIL}The following error occurred when generating the parser. Please check your grammar file.\n{ENDC}",
- file=sys.stderr,
- )
- traceback.print_exception(err.__class__, err, None)
-
- return 1
-
else:
print(
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
)
- if parser == "pegen":
- try:
- from peg_extension import parse # type: ignore
- except Exception as e:
- print(
- "An existing parser was not found. Please run `make` or specify a grammar file with the `-g` flag.",
- file=sys.stderr,
- )
- return 1
+ if tree_arg:
+ assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
# For a given directory, traverse files and attempt to parse each one
# - Output success/failure for each file
errors = 0
files = []
trees = {} # Trees to compare (after everything else is done)
+ total_seconds = 0
- t0 = time.time()
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
# Only attempt to parse Python files and files that are not excluded
should_exclude_file = False
@@ -187,25 +161,31 @@ def parse_directory(
break
if not should_exclude_file:
+ with tokenize.open(file) as f:
+ source = f.read()
try:
- if tree_arg:
- mode = 1
- if parser == "cpython":
- with open(file, "r") as f:
- source = f.read()
- if mode == 2:
- compile(source, file, "exec")
- elif mode == 1:
- ast.parse(source, file, "exec")
+ t0 = time.time()
+ if mode == 2:
+ result = _peg_parser.compile_string(
+ source,
+ filename=file,
+ oldparser=parser == "cpython",
+ )
else:
- tree = parse.parse_file(file, mode=mode)
+ result = _peg_parser.parse_string(
+ source,
+ filename=file,
+ oldparser=parser == "cpython"
+ )
+ t1 = time.time()
+ total_seconds += (t1 - t0)
if tree_arg:
- trees[file] = tree
+ trees[file] = result
if not short:
report_status(succeeded=True, file=file, verbose=verbose)
except Exception as error:
try:
- ast.parse(file)
+ _peg_parser.parse_string(source, mode="exec", oldparser=True)
except Exception:
if not short:
print(f"File {file} cannot be parsed by either pegen or the ast module.")
@@ -217,7 +197,6 @@ def parse_directory(
files.append(file)
t1 = time.time()
- total_seconds = t1 - t0
total_files = len(files)
total_bytes = 0
@@ -238,13 +217,6 @@ def parse_directory(
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
)
- if parser == "pegen":
- # Dump memo stats to @data.
- with open("@data", "w") as datafile:
- for i, count in enumerate(parse.get_memo_stats()):
- if count:
- datafile.write(f"{i:4d} {count:9d}\n")
-
if short:
print_memstats()
@@ -275,6 +247,7 @@ def main() -> None:
skip_actions = args.skip_actions
tree = args.tree
short = args.short
+ mode = 1 if args.tree else 2
sys.exit(
parse_directory(
directory,
@@ -285,8 +258,7 @@ def main() -> None:
skip_actions,
tree,
short,
- None,
- 0,
+ mode,
"pegen",
)
)
diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py
index 7586b1a..98f7778 100755
--- a/Tools/peg_generator/scripts/test_pypi_packages.py
+++ b/Tools/peg_generator/scripts/test_pypi_packages.py
@@ -54,7 +54,7 @@ def find_dirname(package_name: str) -> str:
assert False # This is to fix mypy, should never be reached
-def run_tests(dirname: str, tree: int, extension: Any) -> int:
+def run_tests(dirname: str, tree: int) -> int:
return test_parse_directory.parse_directory(
dirname,
HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
@@ -72,7 +72,6 @@ def run_tests(dirname: str, tree: int, extension: Any) -> int:
skip_actions=False,
tree_arg=tree,
short=True,
- extension=extension,
mode=1,
parser="pegen",
)
@@ -82,13 +81,6 @@ def main() -> None:
args = argparser.parse_args()
tree = args.tree
- extension = build.build_c_parser_and_generator(
- HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
- HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
- "peg_extension/parse.c",
- compile_extension=True,
- )
-
for package in get_packages():
print(f"Extracting files from {package}... ", end="")
try:
@@ -100,7 +92,7 @@ def main() -> None:
print(f"Trying to parse all python files ... ")
dirname = find_dirname(package)
- status = run_tests(dirname, tree, extension)
+ status = run_tests(dirname, tree)
if status == 0:
shutil.rmtree(dirname)
else: