Tools/peg_generator/scripts/benchmark.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

#!/usr/bin/env python3.9

import argparse
import ast
import sys
import os
import resource
from time import time

import memory_profiler

sys.path.insert(0, os.getcwd())
from peg_extension import parse
from pegen.build import build_c_parser_and_generator
from scripts.test_parse_directory import parse_directory

argparser = argparse.ArgumentParser(
    prog="benchmark", description="Reproduce the various pegen benchmarks"
)
argparser.add_argument(
    "--parser",
    action="store",
    choices=["pegen", "cpython"],
    default="pegen",
    help="Which parser to benchmark (default is pegen)",
)
argparser.add_argument(
    "--target",
    action="store",
    choices=["xxl", "stdlib"],
    default="xxl",
    help="Which target to use for the benchmark (default is xxl.py)",
)

subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
command_compile = subcommands.add_parser(
    "compile", help="Benchmark parsing and compiling to bytecode"
)
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
command_check = subcommands.add_parser(
    "check", help="Benchmark parsing and throwing the tree away"
)


def benchmark(func):
    def wrapper(*args):
        times = list()
        for _ in range(3):
            start = time()
            result = func(*args)
            end = time()
            times.append(end - start)
        memory = memory_profiler.memory_usage((func, args))
        print(f"{func.__name__}")
        print(f"\tTime: {sum(times)/3:.3f} seconds on an average of 3 runs")
        print(f"\tMemory: {max(memory)} MiB on an average of 3 runs")
        return result

    return wrapper


@benchmark
def time_compile(source, parser):
    if parser == "cpython":
        return compile(source, os.path.join("data", "xxl.py"), "exec")
    else:
        return parse.parse_string(source, mode=2)


@benchmark
def time_parse(source, parser):
    if parser == "cpython":
        return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
    else:
        return parse.parse_string(source, mode=1)


@benchmark
def time_check(source):
    return parse.parse_string(source, mode=0)


def run_benchmark_xxl(subcommand, parser, source):
    if subcommand == "compile":
        time_compile(source, parser)
    elif subcommand == "parse":
        time_parse(source, parser)
    elif subcommand == "check":
        time_check(source)


def run_benchmark_stdlib(subcommand, parser):
    modes = {"compile": 2, "parse": 1, "check": 0}
    extension = None
    if parser == "pegen":
        extension = build_c_parser_and_generator(
            "../../Grammar/python.gram",
            "../../Grammar/Tokens",
            "peg_extension/parse.c",
            compile_extension=True,
            skip_actions=False,
        )
    for _ in range(3):
        parse_directory(
            "../../Lib",
            "../../Grammar/python.gram",
            verbose=False,
            excluded_files=[
                "*/bad*",
                "*/lib2to3/tests/data/*",
            ],
            skip_actions=False,
            tree_arg=0,
            short=True,
            extension=extension,
            mode=modes[subcommand],
            parser=parser,
        )


def main():
    args = argparser.parse_args()
    subcommand = args.subcommand
    parser = args.parser
    target = args.target

    if subcommand is None:
        argparser.error("A benchmark to run is required")
    if subcommand == "check" and parser == "cpython":
        argparser.error("Cannot use check target with the CPython parser")

    if target == "xxl":
        with open(os.path.join("data", "xxl.py"), "r") as f:
            source = f.read()
            run_benchmark_xxl(subcommand, parser, source)
    elif target == "stdlib":
        run_benchmark_stdlib(subcommand, parser)


if __name__ == "__main__":
    main()