Tools/peg_generator/scripts/test_parse_directory.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

#!/usr/bin/env python3.8

import argparse
import ast
import os
import sys
import time
import tokenize
from glob import glob, escape
from pathlib import PurePath

from typing import List, Optional, Any, Tuple

sys.path.insert(0, os.getcwd())
from pegen.testutil import print_memstats

SUCCESS = "\033[92m"
FAIL = "\033[91m"
ENDC = "\033[0m"

COMPILE = 2
PARSE = 1
NOTREE = 0

argparser = argparse.ArgumentParser(
    prog="test_parse_directory",
    description="Helper program to test directories or files for pegen",
)
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
argparser.add_argument(
    "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
)
argparser.add_argument(
    "-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"
)
argparser.add_argument(
    "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
)


def report_status(
    succeeded: bool,
    file: str,
    verbose: bool,
    error: Optional[Exception] = None,
    short: bool = False,
) -> None:
    if short and succeeded:
        return

    if succeeded is True:
        status = "OK"
        COLOR = SUCCESS
    else:
        status = "Fail"
        COLOR = FAIL

    if short:
        lineno = 0
        offset = 0
        if isinstance(error, SyntaxError):
            lineno = error.lineno or 1
            offset = error.offset or 1
            message = error.args[0]
        else:
            message = f"{error.__class__.__name__}: {error}"
        print(f"{file}:{lineno}:{offset}: {message}")
    else:
        print(f"{COLOR}{file:60} {status}{ENDC}")

        if error and verbose:
            print(f"  {str(error.__class__.__name__)}: {error}")


def parse_file(source: str, file: str) -> Tuple[Any, float]:
    t0 = time.time()
    result = ast.parse(source, filename=file)
    t1 = time.time()
    return result, t1 - t0


def generate_time_stats(files, total_seconds) -> None:
    total_files = len(files)
    total_bytes = 0
    total_lines = 0
    for file in files:
        # Count lines and bytes separately
        with open(file, "rb") as f:
            total_lines += sum(1 for _ in f)
            total_bytes += f.tell()

    print(
        f"Checked {total_files:,} files, {total_lines:,} lines,",
        f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",
    )
    if total_seconds > 0:
        print(
            f"That's {total_lines / total_seconds :,.0f} lines/sec,",
            f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
        )


def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int:
    # For a given directory, traverse files and attempt to parse each one
    # - Output success/failure for each file
    errors = 0
    files = []
    total_seconds = 0

    for file in sorted(glob(os.path.join(escape(directory), f"**/*.py"), recursive=True)):
        # Only attempt to parse Python files and files that are not excluded
        if any(PurePath(file).match(pattern) for pattern in excluded_files):
            continue

        with tokenize.open(file) as f:
            source = f.read()

        try:
            result, dt = parse_file(source, file)
            total_seconds += dt
            report_status(succeeded=True, file=file, verbose=verbose, short=short)
        except SyntaxError as error:
            report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short)
            errors += 1
        files.append(file)

    generate_time_stats(files, total_seconds)
    if short:
        print_memstats()

    if errors:
        print(f"Encountered {errors} failures.", file=sys.stderr)
        return 1

    return 0


def main() -> None:
    args = argparser.parse_args()
    directory = args.directory
    verbose = args.verbose
    excluded_files = args.exclude
    short = args.short
    sys.exit(parse_directory(directory, verbose, excluded_files, short))


if __name__ == "__main__":
    main()