summaryrefslogtreecommitdiffstats
path: root/Tools/peg_generator/scripts/grammar_grapher.py
blob: 4a41dfaa3da0ff8de526c5eff70921f492c3e7a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3.8

""" Convert a grammar into a dot-file suitable for use with GraphViz

    For example:
        Generate the GraphViz file:
        # scripts/grammar_grapher.py data/python.gram > python.gv

        Then generate the graph...

        # twopi python.gv -Tpng > python_twopi.png

        or

        # dot python.gv -Tpng > python_dot.png

        NOTE: The _dot_ and _twopi_ tools seem to produce the most useful results.
              The _circo_ tool is the worst of the bunch. Don't even bother.
"""

import argparse
import sys

from typing import Any, List

sys.path.insert(0, ".")

from pegen.build import build_parser
from pegen.grammar import (
    Alt,
    Cut,
    Forced,
    Grammar,
    Group,
    Leaf,
    Lookahead,
    Rule,
    NameLeaf,
    NamedItem,
    Opt,
    Repeat,
    Rhs,
)

argparser = argparse.ArgumentParser(
    prog="graph_grammar",
    description="Graph a grammar tree",
)
argparser.add_argument(
    "-s",
    "--start",
    choices=["exec", "eval", "single"],
    default="exec",
    help="Choose the grammar's start rule (exec, eval or single)",
)
argparser.add_argument("grammar_file", help="The grammar file to graph")


def references_for_item(item: Any) -> List[Any]:
    if isinstance(item, Alt):
        return [_ref for _item in item.items for _ref in references_for_item(_item)]
    elif isinstance(item, Cut):
        return []
    elif isinstance(item, Forced):
        return references_for_item(item.node)
    elif isinstance(item, Group):
        return references_for_item(item.rhs)
    elif isinstance(item, Lookahead):
        return references_for_item(item.node)
    elif isinstance(item, NamedItem):
        return references_for_item(item.item)

    # NOTE NameLeaf must be before Leaf
    elif isinstance(item, NameLeaf):
        if item.value == "ENDMARKER":
            return []
        return [item.value]
    elif isinstance(item, Leaf):
        return []

    elif isinstance(item, Opt):
        return references_for_item(item.node)
    elif isinstance(item, Repeat):
        return references_for_item(item.node)
    elif isinstance(item, Rhs):
        return [_ref for alt in item.alts for _ref in references_for_item(alt)]
    elif isinstance(item, Rule):
        return references_for_item(item.rhs)
    else:
        raise RuntimeError(f"Unknown item: {type(item)}")


def main() -> None:
    args = argparser.parse_args()

    try:
        grammar, parser, tokenizer = build_parser(args.grammar_file)
    except Exception as err:
        print("ERROR: Failed to parse grammar file", file=sys.stderr)
        sys.exit(1)

    references = {}
    for name, rule in grammar.rules.items():
        references[name] = set(references_for_item(rule))

    # Flatten the start node if has only a single reference
    root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start]

    print("digraph g1 {")
    print('\toverlap="scale";')  # Force twopi to scale the graph to avoid overlaps
    print(f'\troot="{root_node}";')
    print(f"\t{root_node} [color=green, shape=circle];")
    for name, refs in references.items():
        for ref in refs:
            print(f"\t{name} -> {ref};")
    print("}")


if __name__ == "__main__":
    main()