summaryrefslogtreecommitdiffstats
path: root/Parser/pgen/automata.py
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/pgen/automata.py')
-rw-r--r--Parser/pgen/automata.py371
1 files changed, 371 insertions, 0 deletions
diff --git a/Parser/pgen/automata.py b/Parser/pgen/automata.py
new file mode 100644
index 0000000..3147d86
--- /dev/null
+++ b/Parser/pgen/automata.py
@@ -0,0 +1,371 @@
+"""Classes representing state-machine concepts"""
+
+class NFA:
+ """A non deterministic finite automata
+
+ A non deterministic automata is a form of a finite state
+ machine. An NFA's rules are less restrictive than a DFA.
+ The NFA rules are:
+
+ * A transition can be non-deterministic and can result in
+ nothing, one, or two or more states.
+
+ * An epsilon transition consuming empty input is valid.
+ Transitions consuming labeled symbols are also permitted.
+
+ This class assumes that there is only one starting state and one
+ accepting (ending) state.
+
+ Attributes:
+ name (str): The name of the rule the NFA is representing.
+ start (NFAState): The starting state.
+ end (NFAState): The ending state
+ """
+
+ def __init__(self, start, end):
+ self.name = start.rule_name
+ self.start = start
+ self.end = end
+
+ def __repr__(self):
+ return "NFA(start={}, end={})".format(self.start, self.end)
+
+ def dump(self, writer=print):
+ """Dump a graphical representation of the NFA"""
+ todo = [self.start]
+ for i, state in enumerate(todo):
+ writer(" State", i, state is self.end and "(final)" or "")
+ for arc in state.arcs:
+ label = arc.label
+ next = arc.target
+ if next in todo:
+ j = todo.index(next)
+ else:
+ j = len(todo)
+ todo.append(next)
+ if label is None:
+ writer(" -> %d" % j)
+ else:
+ writer(" %s -> %d" % (label, j))
+
+
+class NFAArc:
+ """An arc representing a transition between two NFA states.
+
+ NFA states can be connected via two ways:
+
+ * A label transition: An input equal to the label must
+ be consumed to perform the transition.
+ * An epsilon transition: The transition can be taken without
+ consuming any input symbol.
+
+ Attributes:
+ target (NFAState): The ending state of the transition arc.
+ label (Optional[str]): The label that must be consumed to make
+ the transition. An epsilon transition is represented
+ using `None`.
+ """
+
+ def __init__(self, target, label):
+ self.target = target
+ self.label = label
+
+ def __repr__(self):
+ return "<%s: %s>" % (self.__class__.__name__, self.label)
+
+
+class NFAState:
+ """A state of a NFA, non deterministic finite automata.
+
+ Attributes:
+ target (rule_name): The name of the rule used to represent the NFA's
+ ending state after a transition.
+ arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
+ between the current NFA state and another NFA state via following
+ a label.
+ """
+
+ def __init__(self, rule_name):
+ self.rule_name = rule_name
+ self.arcs = []
+
+ def add_arc(self, target, label=None):
+ """Add a new arc to connect the state to a target state within the NFA
+
+ The method adds a new arc to the list of arcs available as transitions
+ from the present state. An optional label indicates a named transition
+ that consumes an input while the absence of a label represents an epsilon
+ transition.
+
+ Attributes:
+ target (NFAState): The end of the transition that the arc represents.
+ label (Optional[str]): The label that must be consumed for making
+ the transition. If the label is not provided the transition is assumed
+ to be an epsilon-transition.
+ """
+ assert label is None or isinstance(label, str)
+ assert isinstance(target, NFAState)
+ self.arcs.append(NFAArc(target, label))
+
+ def __repr__(self):
+ return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
+
+
+class DFA:
+ """A deterministic finite automata
+
+ A deterministic finite automata is a form of a finite state machine
+ that obeys the following rules:
+
+ * Each of the transitions is uniquely determined by
+ the source state and input symbol
+ * Reading an input symbol is required for each state
+ transition (no epsilon transitions).
+
+ The finite-state machine will accept or reject a string of symbols
+ and only produces a unique computation of the automaton for each input
+ string. The DFA must have a unique starting state (represented as the first
+ element in the list of states) but can have multiple final states.
+
+ Attributes:
+ name (str): The name of the rule the DFA is representing.
+ states (List[DFAState]): A collection of DFA states.
+ """
+
+ def __init__(self, name, states):
+ self.name = name
+ self.states = states
+
+ @classmethod
+ def from_nfa(cls, nfa):
+ """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
+
+ To simulate the operation of a DFA on a given input string, it's
+ necessary to keep track of a single state at any time, or more precisely,
+ the state that the automaton will reach after seeing a prefix of the
+ input. In contrast, to simulate an NFA, it's necessary to keep track of
+ a set of states: all of the states that the automaton could reach after
+ seeing the same prefix of the input, according to the nondeterministic
+ choices made by the automaton. There are two possible sources of
+ non-determinism:
+
+ 1) Multiple (one or more) transitions with the same label
+
+ 'A' +-------+
+ +----------->+ State +----------->+
+ | | 2 |
+ +-------+ +-------+
+ | State |
+ | 1 | +-------+
+ +-------+ | State |
+ +----------->+ 3 +----------->+
+ 'A' +-------+
+
+ 2) Epsilon transitions (transitions that can be taken without consuming any input)
+
+ +-------+ +-------+
+ | State | ε | State |
+ | 1 +----------->+ 2 +----------->+
+ +-------+ +-------+
+
+ Looking at the first case above, we can't determine which transition should be
+ followed when given an input A. We could choose whether or not to follow the
+ transition while in the second case the problem is that we can choose both to
+ follow the transition or not doing it. To solve this problem we can imagine that
+ we follow all possibilities at the same time and we construct new states from the
+ set of all possible reachable states. For every case in the previous example:
+
+
+ 1) For multiple transitions with the same label we colapse all of the
+ final states under the same one
+
+ +-------+ +-------+
+ | State | 'A' | State |
+ | 1 +----------->+ 2-3 +----------->+
+ +-------+ +-------+
+
+ 2) For epsilon transitions we collapse all epsilon-reachable states
+ into the same one
+
+ +-------+
+ | State |
+ | 1-2 +----------->
+ +-------+
+
+ Because the DFA states consist of sets of NFA states, an n-state NFA
+ may be converted to a DFA with at most 2**n states. Notice that the
+ constructed DFA is not minimal and can be simplified or reduced
+ afterwards.
+
+ Parameters:
+ name (NFA): The NFA to transform to DFA.
+ """
+ assert isinstance(nfa, NFA)
+
+ def add_closure(nfa_state, base_nfa_set):
+ """Calculate the epsilon-closure of a given state
+
+ Add to the *base_nfa_set* all the states that are
+ reachable from *nfa_state* via epsilon-transitions.
+ """
+ assert isinstance(nfa_state, NFAState)
+ if nfa_state in base_nfa_set:
+ return
+ base_nfa_set.add(nfa_state)
+ for nfa_arc in nfa_state.arcs:
+ if nfa_arc.label is None:
+ add_closure(nfa_arc.target, base_nfa_set)
+
+ # Calculte the epsilon-closure of the starting state
+ base_nfa_set = set()
+ add_closure(nfa.start, base_nfa_set)
+
+ # Start by visiting the NFA starting state (there is only one).
+ states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
+
+ for state in states: # NB states grow while we're iterating
+
+ # Find transitions from the current state to other reachable states
+ # and store them in mapping that correlates the label to all the
+ # possible reachable states that can be obtained by consuming a
+ # token equal to the label. Each set of all the states that can
+ # be reached after following a label will be the a DFA state.
+ arcs = {}
+ for nfa_state in state.nfa_set:
+ for nfa_arc in nfa_state.arcs:
+ if nfa_arc.label is not None:
+ nfa_set = arcs.setdefault(nfa_arc.label, set())
+ # All states that can be reached by epsilon-transitions
+ # are also included in the set of reachable states.
+ add_closure(nfa_arc.target, nfa_set)
+
+ # Now create new DFAs by visiting all posible transitions between
+ # the current DFA state and the new power-set states (each nfa_set)
+ # via the different labels. As the nodes are appended to *states* this
+ # is performing a deep-first search traversal over the power-set of
+ # the states of the original NFA.
+ for label, nfa_set in sorted(arcs.items()):
+ for exisisting_state in states:
+ if exisisting_state.nfa_set == nfa_set:
+ # The DFA state already exists for this rule.
+ next_state = exisisting_state
+ break
+ else:
+ next_state = DFAState(nfa.name, nfa_set, nfa.end)
+ states.append(next_state)
+
+ # Add a transition between the current DFA state and the new
+ # DFA state (the power-set state) via the current label.
+ state.add_arc(next_state, label)
+
+ return cls(nfa.name, states)
+
+ def __iter__(self):
+ return iter(self.states)
+
+ def simplify(self):
+ """Attempt to reduce the number of states of the DFA
+
+ Transform the DFA into an equivalent DFA that has fewer states. Two
+ classes of states can be removed or merged from the original DFA without
+ affecting the language it accepts to minimize it:
+
+ * Unreachable states can not be reached from the initial
+ state of the DFA, for any input string.
+ * Nondistinguishable states are those that cannot be distinguished
+ from one another for any input string.
+
+ This algorithm does not achieve the optimal fully-reduced solution, but it
+ works well enough for the particularities of the Python grammar. The
+ algorithm repeatedly looks for two states that have the same set of
+ arcs (same labels pointing to the same nodes) and unifies them, until
+ things stop changing.
+ """
+ changes = True
+ while changes:
+ changes = False
+ for i, state_i in enumerate(self.states):
+ for j in range(i + 1, len(self.states)):
+ state_j = self.states[j]
+ if state_i == state_j:
+ del self.states[j]
+ for state in self.states:
+ state.unifystate(state_j, state_i)
+ changes = True
+ break
+
+ def dump(self, writer=print):
+ """Dump a graphical representation of the DFA"""
+ for i, state in enumerate(self.states):
+ writer(" State", i, state.is_final and "(final)" or "")
+ for label, next in sorted(state.arcs.items()):
+ writer(" %s -> %d" % (label, self.states.index(next)))
+
+
+class DFAState(object):
+ """A state of a DFA
+
+ Attributes:
+ rule_name (rule_name): The name of the DFA rule containing the represented state.
+ nfa_set (Set[NFAState]): The set of NFA states used to create this state.
+ final (bool): True if the state represents an accepting state of the DFA
+ containing this state.
+ arcs (Dict[label, DFAState]): A mapping representing transitions between
+ the current DFA state and another DFA state via following a label.
+ """
+
+ def __init__(self, rule_name, nfa_set, final):
+ assert isinstance(nfa_set, set)
+ assert isinstance(next(iter(nfa_set)), NFAState)
+ assert isinstance(final, NFAState)
+ self.rule_name = rule_name
+ self.nfa_set = nfa_set
+ self.arcs = {} # map from terminals/nonterminals to DFAState
+ self.is_final = final in nfa_set
+
+ def add_arc(self, target, label):
+ """Add a new arc to the current state.
+
+ Parameters:
+ target (DFAState): The DFA state at the end of the arc.
+ label (str): The label respresenting the token that must be consumed
+ to perform this transition.
+ """
+ assert isinstance(label, str)
+ assert label not in self.arcs
+ assert isinstance(target, DFAState)
+ self.arcs[label] = target
+
+ def unifystate(self, old, new):
+ """Replace all arcs from the current node to *old* with *new*.
+
+ Parameters:
+ old (DFAState): The DFA state to remove from all existing arcs.
+ new (DFAState): The DFA state to replace in all existing arcs.
+ """
+ for label, next_ in self.arcs.items():
+ if next_ is old:
+ self.arcs[label] = new
+
+ def __eq__(self, other):
+ # The nfa_set does not matter for equality
+ assert isinstance(other, DFAState)
+ if self.is_final != other.is_final:
+ return False
+ # We cannot just return self.arcs == other.arcs because that
+ # would invoke this method recursively if there are any cycles.
+ if len(self.arcs) != len(other.arcs):
+ return False
+ for label, next_ in self.arcs.items():
+ if next_ is not other.arcs.get(label):
+ return False
+ return True
+
+ __hash__ = None # For Py3 compatibility.
+
+ def __repr__(self):
+ return "<%s: %s is_final=%s>" % (
+ self.__class__.__name__,
+ self.rule_name,
+ self.is_final,
+ )