From 8bc401a55ce5dfcdd225c20786ba8e221a0bf29b Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 4 Mar 2019 07:26:13 +0000 Subject: Clean implementation of Parser/pgen and fix some style issues (GH-12156) --- Parser/pgen/__main__.py | 1 + Parser/pgen/grammar.py | 33 +++++++++------------------------ Parser/pgen/pgen.py | 6 ++---- Parser/pgen/token.py | 2 ++ 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/Parser/pgen/__main__.py b/Parser/pgen/__main__.py index 965b08f..eea5261 100644 --- a/Parser/pgen/__main__.py +++ b/Parser/pgen/__main__.py @@ -2,6 +2,7 @@ import argparse from .pgen import ParserGenerator + def main(): parser = argparse.ArgumentParser(description="Parser generator main program.") parser.add_argument( diff --git a/Parser/pgen/grammar.py b/Parser/pgen/grammar.py index bd405e6..05a37d6 100644 --- a/Parser/pgen/grammar.py +++ b/Parser/pgen/grammar.py @@ -1,19 +1,8 @@ import collections -class Grammar: - """Pgen parsing tables conversion class. - - Once initialized, this class supplies the grammar tables for the - parsing engine implemented by parse.py. The parsing engine - accesses the instance variables directly. The class here does not - provide initialization of the tables; several subclasses exist to - do this (see the conv and pgen modules). - The load() method reads the tables from a pickle file, which is - much faster than the other ways offered by subclasses. The pickle - file is written by calling dump() (after loading the grammar - tables using a subclass). The report() method prints a readable - representation of the tables to stdout, for debugging. +class Grammar: + """Pgen parsing tables class. The instance variables are as follows: @@ -36,8 +25,7 @@ class Grammar: dfas -- a dict mapping symbol numbers to (DFA, first) pairs, where DFA is an item from the states list above, and first is a set of tokens that can - begin this grammar rule (represented by a dict - whose values are always 1). + begin this grammar rule. labels -- a list of (x, y) pairs where x is either a token number or a symbol number, and y is either None @@ -92,14 +80,12 @@ class Grammar: "static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels)) ) for label, name in self.labels: - if name is None: - writer(" {{{label}, 0}},\n".format(label=label)) - else: - writer( - ' {{{label}, "{label_name}"}},\n'.format( - label=label, label_name=name - ) + label_name = '"{}"'.format(name) if name is not None else 0 + writer( + ' {{{label}, {label_name}}},\n'.format( + label=label, label_name=label_name ) + ) writer("};\n") def print_dfas(self, writer): @@ -114,10 +100,9 @@ class Grammar: + "0, {n_states}, states_{dfa_index},\n".format( n_states=len(dfa), dfa_index=dfaindex ) + + ' "' ) - writer(' "') - k = [name for label, name in self.labels if label in first_sets] bitset = bytearray((len(self.labels) >> 3) + 1) for token in first_sets: bitset[token >> 3] |= 1 << (token & 7) diff --git a/Parser/pgen/pgen.py b/Parser/pgen/pgen.py index c878919..d52d58f 100644 --- a/Parser/pgen/pgen.py +++ b/Parser/pgen/pgen.py @@ -3,6 +3,7 @@ import tokenize # from stdlib from . import grammar, token + class ParserGenerator(object): def __init__(self, grammar_file, token_file, stream=None, verbose=False): @@ -183,11 +184,8 @@ class ParserGenerator(object): dfa = self.make_dfa(a, z) if self.verbose: self.dump_dfa(name, dfa) - oldlen = len(dfa) self.simplify_dfa(dfa) - newlen = len(dfa) dfas[name] = dfa - #print name, oldlen, newlen if startsymbol is None: startsymbol = name return dfas, startsymbol @@ -355,7 +353,7 @@ class ParserGenerator(object): if args: try: msg = msg % args - except: + except Exception: msg = " ".join([msg] + list(map(str, args))) raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line)) diff --git a/Parser/pgen/token.py b/Parser/pgen/token.py index f9d45c4..008e241 100644 --- a/Parser/pgen/token.py +++ b/Parser/pgen/token.py @@ -1,5 +1,6 @@ import itertools + def generate_tokens(tokens): numbers = itertools.count(0) for line in tokens: @@ -16,6 +17,7 @@ def generate_tokens(tokens): yield ('N_TOKENS', next(numbers)) yield ('NT_OFFSET', 256) + def generate_opmap(tokens): for line in tokens: line = line.strip() -- cgit v0.12