diff options
Diffstat (limited to 'Tools/peg_generator/scripts/grammar_grapher.py')
-rwxr-xr-x | Tools/peg_generator/scripts/grammar_grapher.py | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py new file mode 100755 index 0000000..3aa2546 --- /dev/null +++ b/Tools/peg_generator/scripts/grammar_grapher.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3.8 + +""" Convert a grammar into a dot-file suitable for use with GraphViz + + For example: + Generate the GraphViz file: + # scripts/grammar_grapher.py data/python.gram > python.gv + + Then generate the graph... + + # twopi python.gv -Tpng > python_twopi.png + + or + + # dot python.gv -Tpng > python_dot.png + + NOTE: The _dot_ and _twopi_ tools seem to produce the most useful results. + The _circo_ tool is the worst of the bunch. Don't even bother. +""" + +import argparse +import sys + +from typing import Any, List + +sys.path.insert(0, ".") + +from pegen.build import build_parser +from pegen.grammar import ( + Alt, + Cut, + Grammar, + Group, + Leaf, + Lookahead, + Rule, + NameLeaf, + NamedItem, + Opt, + Repeat, + Rhs, +) + +argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",) +argparser.add_argument("grammar_file", help="The grammar file to graph") + + +def references_for_item(item: Any) -> List[Any]: + if isinstance(item, Alt): + return [_ref for _item in item.items for _ref in references_for_item(_item)] + elif isinstance(item, Cut): + return [] + elif isinstance(item, Group): + return references_for_item(item.rhs) + elif isinstance(item, Lookahead): + return references_for_item(item.node) + elif isinstance(item, NamedItem): + return references_for_item(item.item) + + # NOTE NameLeaf must be before Leaf + elif isinstance(item, NameLeaf): + if item.value == "ENDMARKER": + return [] + return [item.value] + elif isinstance(item, Leaf): + return [] + + elif isinstance(item, Opt): + return references_for_item(item.node) + elif isinstance(item, Repeat): + return references_for_item(item.node) + elif isinstance(item, Rhs): + return [_ref for alt in item.alts for _ref in references_for_item(alt)] + elif isinstance(item, Rule): + return references_for_item(item.rhs) + else: + raise RuntimeError(f"Unknown item: {type(item)}") + + +def main() -> None: + args = argparser.parse_args() + + try: + grammar, parser, tokenizer = build_parser(args.grammar_file) + except Exception as err: + print("ERROR: Failed to parse grammar file", file=sys.stderr) + sys.exit(1) + + references = {} + for name, rule in grammar.rules.items(): + references[name] = set(references_for_item(rule)) + + # Flatten the start node if has only a single reference + root_node = "start" + if start := references["start"]: + if len(start) == 1: + root_node = list(start)[0] + del references["start"] + + print("digraph g1 {") + print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps + print(f'\troot="{root_node}";') + print(f"\t{root_node} [color=green, shape=circle]") + for name, refs in references.items(): + if refs: # Ignore empty sets + print(f"\t{name} -> {','.join(refs)};") + print("}") + + +if __name__ == "__main__": + main() |