summaryrefslogtreecommitdiffstats
path: root/Tools/peg_generator/scripts/grammar_grapher.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/peg_generator/scripts/grammar_grapher.py')
-rwxr-xr-xTools/peg_generator/scripts/grammar_grapher.py111
1 files changed, 111 insertions, 0 deletions
diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py
new file mode 100755
index 0000000..3aa2546
--- /dev/null
+++ b/Tools/peg_generator/scripts/grammar_grapher.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3.8
+
+""" Convert a grammar into a dot-file suitable for use with GraphViz
+
+ For example:
+ Generate the GraphViz file:
+ # scripts/grammar_grapher.py data/python.gram > python.gv
+
+ Then generate the graph...
+
+ # twopi python.gv -Tpng > python_twopi.png
+
+ or
+
+ # dot python.gv -Tpng > python_dot.png
+
+ NOTE: The _dot_ and _twopi_ tools seem to produce the most useful results.
+ The _circo_ tool is the worst of the bunch. Don't even bother.
+"""
+
+import argparse
+import sys
+
+from typing import Any, List
+
+sys.path.insert(0, ".")
+
+from pegen.build import build_parser
+from pegen.grammar import (
+ Alt,
+ Cut,
+ Grammar,
+ Group,
+ Leaf,
+ Lookahead,
+ Rule,
+ NameLeaf,
+ NamedItem,
+ Opt,
+ Repeat,
+ Rhs,
+)
+
+argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
+argparser.add_argument("grammar_file", help="The grammar file to graph")
+
+
+def references_for_item(item: Any) -> List[Any]:
+ if isinstance(item, Alt):
+ return [_ref for _item in item.items for _ref in references_for_item(_item)]
+ elif isinstance(item, Cut):
+ return []
+ elif isinstance(item, Group):
+ return references_for_item(item.rhs)
+ elif isinstance(item, Lookahead):
+ return references_for_item(item.node)
+ elif isinstance(item, NamedItem):
+ return references_for_item(item.item)
+
+ # NOTE NameLeaf must be before Leaf
+ elif isinstance(item, NameLeaf):
+ if item.value == "ENDMARKER":
+ return []
+ return [item.value]
+ elif isinstance(item, Leaf):
+ return []
+
+ elif isinstance(item, Opt):
+ return references_for_item(item.node)
+ elif isinstance(item, Repeat):
+ return references_for_item(item.node)
+ elif isinstance(item, Rhs):
+ return [_ref for alt in item.alts for _ref in references_for_item(alt)]
+ elif isinstance(item, Rule):
+ return references_for_item(item.rhs)
+ else:
+ raise RuntimeError(f"Unknown item: {type(item)}")
+
+
+def main() -> None:
+ args = argparser.parse_args()
+
+ try:
+ grammar, parser, tokenizer = build_parser(args.grammar_file)
+ except Exception as err:
+ print("ERROR: Failed to parse grammar file", file=sys.stderr)
+ sys.exit(1)
+
+ references = {}
+ for name, rule in grammar.rules.items():
+ references[name] = set(references_for_item(rule))
+
+ # Flatten the start node if has only a single reference
+ root_node = "start"
+ if start := references["start"]:
+ if len(start) == 1:
+ root_node = list(start)[0]
+ del references["start"]
+
+ print("digraph g1 {")
+ print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps
+ print(f'\troot="{root_node}";')
+ print(f"\t{root_node} [color=green, shape=circle]")
+ for name, refs in references.items():
+ if refs: # Ignore empty sets
+ print(f"\t{name} -> {','.join(refs)};")
+ print("}")
+
+
+if __name__ == "__main__":
+ main()