summaryrefslogtreecommitdiffstats
path: root/Parser/pgen/metaparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/pgen/metaparser.py')
-rw-r--r--Parser/pgen/metaparser.py152
1 files changed, 0 insertions, 152 deletions
diff --git a/Parser/pgen/metaparser.py b/Parser/pgen/metaparser.py
deleted file mode 100644
index 074a083..0000000
--- a/Parser/pgen/metaparser.py
+++ /dev/null
@@ -1,152 +0,0 @@
-"""Parser for the Python metagrammar"""
-
-import io
-import tokenize # from stdlib
-
-from .automata import NFA, NFAState
-
-
-class GrammarParser:
- """Parser for Python grammar files."""
-
- _translation_table = {
- tokenize.NAME: "NAME",
- tokenize.STRING: "STRING",
- tokenize.NEWLINE: "NEWLINE",
- tokenize.NL: "NL",
- tokenize.OP: "OP",
- tokenize.ENDMARKER: "ENDMARKER",
- tokenize.COMMENT: "COMMENT",
- }
-
- def __init__(self, grammar):
- self.grammar = grammar
- grammar_adaptor = io.StringIO(grammar)
- self.generator = tokenize.generate_tokens(grammar_adaptor.readline)
- self._gettoken() # Initialize lookahead
- self._current_rule_name = None
-
- def parse(self):
- """Turn the grammar into a collection of NFAs"""
- # grammar: (NEWLINE | rule)* ENDMARKER
- while self.type != tokenize.ENDMARKER:
- while self.type == tokenize.NEWLINE:
- self._gettoken()
- # rule: NAME ':' rhs NEWLINE
- self._current_rule_name = self._expect(tokenize.NAME)
- self._expect(tokenize.OP, ":")
- a, z = self._parse_rhs()
- self._expect(tokenize.NEWLINE)
-
- yield NFA(a, z)
-
- def _parse_rhs(self):
- # rhs: items ('|' items)*
- a, z = self._parse_items()
- if self.value != "|":
- return a, z
- else:
- aa = NFAState(self._current_rule_name)
- zz = NFAState(self._current_rule_name)
- while True:
- # Allow to transit directly to the previous state and connect the end of the
- # previous state to the end of the current one, effectively allowing to skip
- # the current state.
- aa.add_arc(a)
- z.add_arc(zz)
- if self.value != "|":
- break
-
- self._gettoken()
- a, z = self._parse_items()
- return aa, zz
-
- def _parse_items(self):
- # items: item+
- a, b = self._parse_item()
- while self.type in (tokenize.NAME, tokenize.STRING) or self.value in ("(", "["):
- c, d = self._parse_item()
- # Allow a transition between the end of the previous state
- # and the beginning of the new one, connecting all the items
- # together. In this way we can only reach the end if we visit
- # all the items.
- b.add_arc(c)
- b = d
- return a, b
-
- def _parse_item(self):
- # item: '[' rhs ']' | atom ['+' | '*']
- if self.value == "[":
- self._gettoken()
- a, z = self._parse_rhs()
- self._expect(tokenize.OP, "]")
- # Make a transition from the beginning to the end so it is possible to
- # advance for free to the next state of this item # without consuming
- # anything from the rhs.
- a.add_arc(z)
- return a, z
- else:
- a, z = self._parse_atom()
- value = self.value
- if value not in ("+", "*"):
- return a, z
- self._gettoken()
- z.add_arc(a)
- if value == "+":
- # Create a cycle to the beginning so we go back to the old state in this
- # item and repeat.
- return a, z
- else:
- # The end state is the same as the beginning, so we can cycle arbitrarily
- # and end in the beginning if necessary.
- return a, a
-
- def _parse_atom(self):
- # atom: '(' rhs ')' | NAME | STRING
- if self.value == "(":
- self._gettoken()
- a, z = self._parse_rhs()
- self._expect(tokenize.OP, ")")
- return a, z
- elif self.type in (tokenize.NAME, tokenize.STRING):
- a = NFAState(self._current_rule_name)
- z = NFAState(self._current_rule_name)
- # We can transit to the next state only if we consume the value.
- a.add_arc(z, self.value)
- self._gettoken()
- return a, z
- else:
- self._raise_error(
- "expected (...) or NAME or STRING, got {} ({})",
- self._translation_table.get(self.type, self.type),
- self.value,
- )
-
- def _expect(self, type_, value=None):
- if self.type != type_:
- self._raise_error(
- "expected {}, got {} ({})",
- self._translation_table.get(type_, type_),
- self._translation_table.get(self.type, self.type),
- self.value,
- )
- if value is not None and self.value != value:
- self._raise_error("expected {}, got {}", value, self.value)
- value = self.value
- self._gettoken()
- return value
-
- def _gettoken(self):
- tup = next(self.generator)
- while tup[0] in (tokenize.COMMENT, tokenize.NL):
- tup = next(self.generator)
- self.type, self.value, self.begin, self.end, self.line = tup
-
- def _raise_error(self, msg, *args):
- if args:
- try:
- msg = msg.format(*args)
- except Exception:
- msg = " ".join([msg] + list(map(str, args)))
- line = self.grammar.splitlines()[self.begin[0] - 1]
- raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))