diff options
author | Jeremy Hylton <jeremy@alum.mit.edu> | 2005-10-20 19:59:25 (GMT) |
---|---|---|
committer | Jeremy Hylton <jeremy@alum.mit.edu> | 2005-10-20 19:59:25 (GMT) |
commit | 3e0055f8c65c407e74ce476b8e2b1fb889723514 (patch) | |
tree | 169cce8c87033e15364b57de947073e6e9c34d59 /Parser | |
parent | 2cb94aba122b86dcda87d437eb36a860d14393d5 (diff) | |
download | cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.zip cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.gz cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.bz2 |
Merge ast-branch to head
This change implements a new bytecode compiler, based on a
transformation of the parse tree to an abstract syntax defined in
Parser/Python.asdl.
The compiler implementation is not complete, but it is in stable
enough shape to run the entire test suite excepting two disabled
tests.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/.cvsignore | 3 | ||||
-rw-r--r-- | Parser/Python.asdl | 107 | ||||
-rw-r--r-- | Parser/asdl.py | 393 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 621 | ||||
-rw-r--r-- | Parser/grammar.mak | 2 | ||||
-rw-r--r-- | Parser/parsetok.c | 5 | ||||
-rw-r--r-- | Parser/spark.py | 840 |
7 files changed, 1967 insertions, 4 deletions
diff --git a/Parser/.cvsignore b/Parser/.cvsignore index a7dd8e6..f300915 100644 --- a/Parser/.cvsignore +++ b/Parser/.cvsignore @@ -1,3 +1,6 @@ Makefile pgen add2lib +asdl.pyc +asdl_c.pyc +spark.pyc diff --git a/Parser/Python.asdl b/Parser/Python.asdl new file mode 100644 index 0000000..0883d91 --- /dev/null +++ b/Parser/Python.asdl @@ -0,0 +1,107 @@ +-- ASDL's three builtin types are identifier, int, string + +module Python +{ + mod = Module(stmt* body) + | Interactive(stmt* body) + | Expression(expr body) + + -- not really an actual node but useful in Jython's typesystem. + | Suite(stmt* body) + + stmt = FunctionDef(identifier name, arguments args, + stmt* body, expr* decorators) + | ClassDef(identifier name, expr* bases, stmt* body) + | Return(expr? value) + + | Delete(expr* targets) + | Assign(expr* targets, expr value) + | AugAssign(expr target, operator op, expr value) + + -- not sure if bool is allowed, can always use int + | Print(expr? dest, expr* values, bool nl) + + -- use 'orelse' because else is a keyword in target languages + | For(expr target, expr iter, stmt* body, stmt* orelse) + | While(expr test, stmt* body, stmt* orelse) + | If(expr test, stmt* body, stmt* orelse) + + -- 'type' is a bad name + | Raise(expr? type, expr? inst, expr? tback) + | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) + | TryFinally(stmt* body, stmt* finalbody) + | Assert(expr test, expr? msg) + + | Import(alias* names) + | ImportFrom(identifier module, alias* names) + + -- Doesn't capture requirement that locals must be + -- defined if globals is + -- still supports use as a function! + | Exec(expr body, expr? globals, expr? locals) + + | Global(identifier* names) + | Expr(expr value) + | Pass | Break | Continue + + -- XXX Jython will be different + attributes (int lineno) + + -- BoolOp() can use left & right? + expr = BoolOp(boolop op, expr* values) + | BinOp(expr left, operator op, expr right) + | UnaryOp(unaryop op, expr operand) + | Lambda(arguments args, expr body) + | Dict(expr* keys, expr* values) + | ListComp(expr elt, comprehension* generators) + | GeneratorExp(expr elt, comprehension* generators) + | Yield(expr? value) + -- need sequences for compare to distinguish between + -- x < 4 < 3 and (x < 4) < 3 + | Compare(expr left, cmpop* ops, expr* comparators) + | Call(expr func, expr* args, keyword* keywords, + expr? starargs, expr? kwargs) + | Repr(expr value) + | Num(object n) -- a number as a PyObject. + | Str(string s) -- need to specify raw, unicode, etc? + -- other literals? bools? + + -- the following expression can appear in assignment context + | Attribute(expr value, identifier attr, expr_context ctx) + | Subscript(expr value, slice slice, expr_context ctx) + | Name(identifier id, expr_context ctx) + | List(expr* elts, expr_context ctx) + | Tuple(expr *elts, expr_context ctx) + + attributes (int lineno) + + expr_context = Load | Store | Del | AugLoad | AugStore | Param + + slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) + | ExtSlice(slice* dims) + | Index(expr value) + + boolop = And | Or + + operator = Add | Sub | Mult | Div | Mod | Pow | LShift + | RShift | BitOr | BitXor | BitAnd | FloorDiv + + unaryop = Invert | Not | UAdd | USub + + cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn + + comprehension = (expr target, expr iter, expr* ifs) + + -- not sure what to call the first argument for raise and except + + excepthandler = (expr? type, expr? name, stmt* body) + + arguments = (expr* args, identifier? vararg, + identifier? kwarg, expr* defaults) + + -- keyword arguments supplied to call + keyword = (identifier arg, expr value) + + -- import name with optional 'as' alias. + alias = (identifier name, identifier? asname) +} diff --git a/Parser/asdl.py b/Parser/asdl.py new file mode 100644 index 0000000..0db4e3b --- /dev/null +++ b/Parser/asdl.py @@ -0,0 +1,393 @@ +"""An implementation of the Zephyr Abstract Syntax Definition Language. + +See http://asdl.sourceforge.net/ and +http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html. + +Only supports top level module decl, not view. I'm guessing that view +is intended to support the browser and I'm not interested in the +browser. +""" + +#__metaclass__ = type + +import os +import traceback + +import spark + +class Token: + # spark seems to dispatch in the parser based on a token's + # type attribute + def __init__(self, type, lineno): + self.type = type + self.lineno = lineno + + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + +class Id(Token): + def __init__(self, value, lineno): + self.type = 'Id' + self.value = value + self.lineno = lineno + + def __str__(self): + return self.value + +class ASDLSyntaxError: + + def __init__(self, lineno, token=None, msg=None): + self.lineno = lineno + self.token = token + self.msg = msg + + def __str__(self): + if self.msg is None: + return "Error at '%s', line %d" % (self.token, self.lineno) + else: + return "%s, line %d" % (self.msg, self.lineno) + +class ASDLScanner(spark.GenericScanner, object): + + def tokenize(self, input): + self.rv = [] + self.lineno = 1 + super(ASDLScanner, self).tokenize(input) + return self.rv + + def t_id(self, s): + r"[\w\.]+" + # XXX doesn't distinguish upper vs. lower, which is + # significant for ASDL. + self.rv.append(Id(s, self.lineno)) + + def t_xxx(self, s): # not sure what this production means + r"<=" + self.rv.append(Token(s, self.lineno)) + + def t_punctuation(self, s): + r"[\{\}\*\=\|\(\)\,\?\:]" + self.rv.append(Token(s, self.lineno)) + + def t_comment(self, s): + r"\-\-[^\n]*" + pass + + def t_newline(self, s): + r"\n" + self.lineno += 1 + + def t_whitespace(self, s): + r"[ \t]+" + pass + + def t_default(self, s): + r" . +" + raise ValueError, "unmatched input: %s" % `s` + +class ASDLParser(spark.GenericParser, object): + def __init__(self): + super(ASDLParser, self).__init__("module") + + def typestring(self, tok): + return tok.type + + def error(self, tok): + raise ASDLSyntaxError(tok.lineno, tok) + + def p_module_0(self, (module, name, _0, _1)): + " module ::= Id Id { } " + if module.value != "module": + raise ASDLSyntaxError(module.lineno, + msg="expected 'module', found %s" % module) + return Module(name, None) + + def p_module(self, (module, name, _0, definitions, _1)): + " module ::= Id Id { definitions } " + if module.value != "module": + raise ASDLSyntaxError(module.lineno, + msg="expected 'module', found %s" % module) + return Module(name, definitions) + + def p_definition_0(self, (definition,)): + " definitions ::= definition " + return definition + + def p_definition_1(self, (definitions, definition)): + " definitions ::= definition definitions " + return definitions + definition + + def p_definition(self, (id, _, type)): + " definition ::= Id = type " + return [Type(id, type)] + + def p_type_0(self, (product,)): + " type ::= product " + return product + + def p_type_1(self, (sum,)): + " type ::= sum " + return Sum(sum) + + def p_type_2(self, (sum, id, _0, attributes, _1)): + " type ::= sum Id ( fields ) " + if id.value != "attributes": + raise ASDLSyntaxError(id.lineno, + msg="expected attributes, found %s" % id) + return Sum(sum, attributes) + + def p_product(self, (_0, fields, _1)): + " product ::= ( fields ) " + # XXX can't I just construct things in the right order? + fields.reverse() + return Product(fields) + + def p_sum_0(self, (constructor,)): + " sum ::= constructor """ + return [constructor] + + def p_sum_1(self, (constructor, _, sum)): + " sum ::= constructor | sum " + return [constructor] + sum + + def p_sum_2(self, (constructor, _, sum)): + " sum ::= constructor | sum " + return [constructor] + sum + + def p_constructor_0(self, (id,)): + " constructor ::= Id " + return Constructor(id) + + def p_constructor_1(self, (id, _0, fields, _1)): + " constructor ::= Id ( fields ) " + # XXX can't I just construct things in the right order? + fields.reverse() + return Constructor(id, fields) + + def p_fields_0(self, (field,)): + " fields ::= field " + return [field] + + def p_fields_1(self, (field, _, fields)): + " fields ::= field , fields " + return fields + [field] + + def p_field_0(self, (type,)): + " field ::= Id " + return Field(type) + + def p_field_1(self, (type, name)): + " field ::= Id Id " + return Field(type, name) + + def p_field_2(self, (type, _, name)): + " field ::= Id * Id " + return Field(type, name, seq=1) + + def p_field_3(self, (type, _, name)): + " field ::= Id ? Id " + return Field(type, name, opt=1) + + def p_field_4(self, (type, _)): + " field ::= Id * " + return Field(type, seq=1) + + def p_field_5(self, (type, _)): + " field ::= Id ? " + return Field(type, opt=1) + +builtin_types = ("identifier", "string", "int", "bool", "object") + +# below is a collection of classes to capture the AST of an AST :-) +# not sure if any of the methods are useful yet, but I'm adding them +# piecemeal as they seem helpful + +class AST: + pass # a marker class + +class Module(AST): + def __init__(self, name, dfns): + self.name = name + self.dfns = dfns + self.types = {} # maps type name to value (from dfns) + for type in dfns: + self.types[type.name.value] = type.value + + def __repr__(self): + return "Module(%s, %s)" % (self.name, self.dfns) + +class Type(AST): + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return "Type(%s, %s)" % (self.name, self.value) + +class Constructor(AST): + def __init__(self, name, fields=None): + self.name = name + self.fields = fields or [] + + def __repr__(self): + return "Constructor(%s, %s)" % (self.name, self.fields) + +class Field(AST): + def __init__(self, type, name=None, seq=0, opt=0): + self.type = type + self.name = name + self.seq = seq + self.opt = opt + + def __repr__(self): + if self.seq: + extra = ", seq=1" + elif self.opt: + extra = ", opt=1" + else: + extra = "" + if self.name is None: + return "Field(%s%s)" % (self.type, extra) + else: + return "Field(%s, %s%s)" % (self.type, self.name, extra) + +class Sum(AST): + def __init__(self, types, attributes=None): + self.types = types + self.attributes = attributes or [] + + def __repr__(self): + if self.attributes is None: + return "Sum(%s)" % self.types + else: + return "Sum(%s, %s)" % (self.types, self.attributes) + +class Product(AST): + def __init__(self, fields): + self.fields = fields + + def __repr__(self): + return "Product(%s)" % self.fields + +class VisitorBase(object): + + def __init__(self, skip=0): + self.cache = {} + self.skip = skip + + def visit(self, object, *args): + meth = self._dispatch(object) + if meth is None: + return + try: + meth(object, *args) + except Exception, err: + print "Error visiting", repr(object) + print err + traceback.print_exc() + # XXX hack + if hasattr(self, 'file'): + self.file.flush() + os._exit(1) + + def _dispatch(self, object): + assert isinstance(object, AST), repr(object) + klass = object.__class__ + meth = self.cache.get(klass) + if meth is None: + methname = "visit" + klass.__name__ + if self.skip: + meth = getattr(self, methname, None) + else: + meth = getattr(self, methname) + self.cache[klass] = meth + return meth + +class Check(VisitorBase): + + def __init__(self): + super(Check, self).__init__(skip=1) + self.cons = {} + self.errors = 0 + self.types = {} + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, str(type.name)) + + def visitSum(self, sum, name): + for t in sum.types: + self.visit(t, name) + + def visitConstructor(self, cons, name): + key = str(cons.name) + conflict = self.cons.get(key) + if conflict is None: + self.cons[key] = name + else: + print "Redefinition of constructor %s" % key + print "Defined in %s and %s" % (conflict, name) + self.errors += 1 + for f in cons.fields: + self.visit(f, key) + + def visitField(self, field, name): + key = str(field.type) + l = self.types.setdefault(key, []) + l.append(name) + + def visitProduct(self, prod, name): + for f in prod.fields: + self.visit(f, name) + +def check(mod): + v = Check() + v.visit(mod) + + for t in v.types: + if not mod.types.has_key(t) and not t in builtin_types: + v.errors += 1 + uses = ", ".join(v.types[t]) + print "Undefined type %s, used in %s" % (t, uses) + + return not v.errors + +def parse(file): + scanner = ASDLScanner() + parser = ASDLParser() + + buf = open(file).read() + tokens = scanner.tokenize(buf) + try: + return parser.parse(tokens) + except ASDLSyntaxError, err: + print err + lines = buf.split("\n") + print lines[err.lineno - 1] # lines starts at 0, files at 1 + +if __name__ == "__main__": + import glob + import sys + + if len(sys.argv) > 1: + files = sys.argv[1:] + else: + testdir = "tests" + files = glob.glob(testdir + "/*.asdl") + + for file in files: + print file + mod = parse(file) + print "module", mod.name + print len(mod.dfns), "definitions" + if not check(mod): + print "Check failed" + else: + for dfn in mod.dfns: + print dfn.type diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py new file mode 100755 index 0000000..7c6df4e --- /dev/null +++ b/Parser/asdl_c.py @@ -0,0 +1,621 @@ +#! /usr/bin/env python +"""Generate C code from an ASDL description.""" + +# TO DO +# handle fields that have a type but no name + +import os, sys, traceback + +import asdl + +TABSIZE = 8 +MAX_COL = 80 + +def get_c_type(name): + """Return a string for the C name of the type. + + This function special cases the default types provided by asdl: + identifier, string, int, bool. + """ + # XXX ack! need to figure out where Id is useful and where string + if isinstance(name, asdl.Id): + name = name.value + if name in asdl.builtin_types: + return name + else: + return "%s_ty" % name + +def reflow_lines(s, depth): + """Reflow the line s indented depth tabs. + + Return a sequence of lines where no line extends beyond MAX_COL + when properly indented. The first line is properly indented based + exclusively on depth * TABSIZE. All following lines -- these are + the reflowed lines generated by this function -- start at the same + column as the first character beyond the opening { in the first + line. + """ + size = MAX_COL - depth * TABSIZE + if len(s) < size: + return [s] + + lines = [] + cur = s + padding = "" + while len(cur) > size: + i = cur.rfind(' ', 0, size) + # XXX this should be fixed for real + if i == -1 and 'GeneratorExp' in cur: + i = size + 3 + assert i != -1, "Impossible line %d to reflow: %s" % (size, `s`) + lines.append(padding + cur[:i]) + if len(lines) == 1: + # find new size based on brace + j = cur.find('{', 0, i) + if j >= 0: + j += 2 # account for the brace and the space after it + size -= j + padding = " " * j + else: + j = cur.find('(', 0, i) + if j >= 0: + j += 1 # account for the paren (no space after it) + size -= j + padding = " " * j + cur = cur[i+1:] + else: + lines.append(padding + cur) + return lines + +def is_simple(sum): + """Return True if a sum is a simple. + + A sum is simple if its types have no fields, e.g. + unaryop = Invert | Not | UAdd | USub + """ + + for t in sum.types: + if t.fields: + return False + return True + +class EmitVisitor(asdl.VisitorBase): + """Visit that emits lines""" + + def __init__(self, file): + self.file = file + super(EmitVisitor, self).__init__() + + def emit(self, s, depth, reflow=1): + # XXX reflow long lines? + if reflow: + lines = reflow_lines(s, depth) + else: + lines = [s] + for line in lines: + line = (" " * TABSIZE * depth) + line + "\n" + self.file.write(line) + +class TypeDefVisitor(EmitVisitor): + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + if is_simple(sum): + self.simple_sum(sum, name, depth) + else: + self.sum_with_constructors(sum, name, depth) + + def simple_sum(self, sum, name, depth): + enum = [] + for i in range(len(sum.types)): + type = sum.types[i] + enum.append("%s=%d" % (type.name, i + 1)) + enums = ", ".join(enum) + ctype = get_c_type(name) + s = "typedef enum _%s { %s } %s;" % (name, enums, ctype) + self.emit(s, depth) + self.emit("", depth) + + def sum_with_constructors(self, sum, name, depth): + ctype = get_c_type(name) + s = "typedef struct _%(name)s *%(ctype)s;" % locals() + self.emit(s, depth) + self.emit("", depth) + + def visitProduct(self, product, name, depth): + ctype = get_c_type(name) + s = "typedef struct _%(name)s *%(ctype)s;" % locals() + self.emit(s, depth) + self.emit("", depth) + +class StructVisitor(EmitVisitor): + """Visitor to generate typdefs for AST.""" + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + if not is_simple(sum): + self.sum_with_constructors(sum, name, depth) + + def sum_with_constructors(self, sum, name, depth): + def emit(s, depth=depth): + self.emit(s % sys._getframe(1).f_locals, depth) + enum = [] + for i in range(len(sum.types)): + type = sum.types[i] + enum.append("%s_kind=%d" % (type.name, i + 1)) + + emit("struct _%(name)s {") + emit("enum { " + ", ".join(enum) + " } kind;", depth + 1) + emit("union {", depth + 1) + for t in sum.types: + self.visit(t, depth + 2) + emit("} v;", depth + 1) + for field in sum.attributes: + # rudimentary attribute handling + type = str(field.type) + assert type in asdl.builtin_types, type + emit("%s %s;" % (type, field.name), depth + 1); + emit("};") + emit("") + + def visitConstructor(self, cons, depth): + if cons.fields: + self.emit("struct {", depth) + for f in cons.fields: + self.visit(f, depth + 1) + self.emit("} %s;" % cons.name, depth) + self.emit("", depth) + else: + # XXX not sure what I want here, nothing is probably fine + pass + + def visitField(self, field, depth): + # XXX need to lookup field.type, because it might be something + # like a builtin... + ctype = get_c_type(field.type) + name = field.name + if field.seq: + self.emit("asdl_seq *%(name)s;" % locals(), depth) + else: + self.emit("%(ctype)s %(name)s;" % locals(), depth) + + def visitProduct(self, product, name, depth): + self.emit("struct _%(name)s {" % locals(), depth) + for f in product.fields: + self.visit(f, depth + 1) + self.emit("};", depth) + self.emit("", depth) + +class PrototypeVisitor(EmitVisitor): + """Generate function prototypes for the .h file""" + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, type.name) + + def visitSum(self, sum, name): + if is_simple(sum): + pass # XXX + else: + for t in sum.types: + self.visit(t, name, sum.attributes) + + def get_args(self, fields): + """Return list of C argument into, one for each field. + + Argument info is 3-tuple of a C type, variable name, and flag + that is true if type can be NULL. + """ + args = [] + unnamed = {} + for f in fields: + if f.name is None: + name = f.type + c = unnamed[name] = unnamed.get(name, 0) + 1 + if c > 1: + name = "name%d" % (c - 1) + else: + name = f.name + # XXX should extend get_c_type() to handle this + if f.seq: + ctype = "asdl_seq *" + else: + ctype = get_c_type(f.type) + args.append((ctype, name, f.opt or f.seq)) + return args + + def visitConstructor(self, cons, type, attrs): + args = self.get_args(cons.fields) + attrs = self.get_args(attrs) + ctype = get_c_type(type) + self.emit_function(cons.name, ctype, args, attrs) + + def emit_function(self, name, ctype, args, attrs, union=1): + args = args + attrs + if args: + argstr = ", ".join(["%s %s" % (atype, aname) + for atype, aname, opt in args]) + else: + argstr = "void" + self.emit("%s %s(%s);" % (ctype, name, argstr), 0) + + def visitProduct(self, prod, name): + self.emit_function(name, get_c_type(name), + self.get_args(prod.fields), [], union=0) + +class FunctionVisitor(PrototypeVisitor): + """Visitor to generate constructor functions for AST.""" + + def emit_function(self, name, ctype, args, attrs, union=1): + def emit(s, depth=0, reflow=1): + self.emit(s, depth, reflow) + argstr = ", ".join(["%s %s" % (atype, aname) + for atype, aname, opt in args + attrs]) + self.emit("%s" % ctype, 0) + emit("%s(%s)" % (name, argstr)) + emit("{") + emit("%s p;" % ctype, 1) + for argtype, argname, opt in args: + # XXX hack alert: false is allowed for a bool + if not opt and not argtype == "bool": + emit("if (!%s) {" % argname, 1) + emit("PyErr_SetString(PyExc_ValueError,", 2) + msg = "field %s is required for %s" % (argname, name) + emit(' "%s");' % msg, + 2, reflow=0) + emit('return NULL;', 2) + emit('}', 1) + + emit("p = (%s)malloc(sizeof(*p));" % ctype, 1) + emit("if (!p) {", 1) + emit("PyErr_SetString(PyExc_MemoryError, \"no memory\");", 2) + emit("return NULL;", 2) + emit("}", 1) + if union: + self.emit_body_union(name, args, attrs) + else: + self.emit_body_struct(name, args, attrs) + emit("return p;", 1) + emit("}") + emit("") + + def emit_body_union(self, name, args, attrs): + def emit(s, depth=0, reflow=1): + self.emit(s, depth, reflow) + emit("p->kind = %s_kind;" % name, 1) + for argtype, argname, opt in args: + emit("p->v.%s.%s = %s;" % (name, argname, argname), 1) + for argtype, argname, opt in attrs: + emit("p->%s = %s;" % (argname, argname), 1) + + def emit_body_struct(self, name, args, attrs): + def emit(s, depth=0, reflow=1): + self.emit(s, depth, reflow) + for argtype, argname, opt in args: + emit("p->%s = %s;" % (argname, argname), 1) + assert not attrs + +class PickleVisitor(EmitVisitor): + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, type.name) + + def visitSum(self, sum, name): + pass + + def visitProduct(self, sum, name): + pass + + def visitConstructor(self, cons, name): + pass + + def visitField(self, sum): + pass + +class MarshalPrototypeVisitor(PickleVisitor): + + def prototype(self, sum, name): + ctype = get_c_type(name) + self.emit("int marshal_write_%s(PyObject **, int *, %s);" + % (name, ctype), 0) + + visitProduct = visitSum = prototype + +class FreePrototypeVisitor(PickleVisitor): + + def prototype(self, sum, name): + ctype = get_c_type(name) + self.emit("void free_%s(%s);" % (name, ctype), 0) + + visitProduct = visitSum = prototype + +_SPECIALIZED_SEQUENCES = ('stmt', 'expr') + +def find_sequence(fields, doing_specialization): + """Return True if any field uses a sequence.""" + for f in fields: + if f.seq: + if not doing_specialization: + return True + if str(f.type) not in _SPECIALIZED_SEQUENCES: + return True + return False + +def has_sequence(types, doing_specialization): + for t in types: + if find_sequence(t.fields, doing_specialization): + return True + return False + + +class StaticVisitor(PickleVisitor): + '''Very simple, always emit this static code''' + + CODE = '''static void +free_seq_exprs(asdl_seq *seq) +{ + int i, n; + n = asdl_seq_LEN(seq); + for (i = 0; i < n; i++) + free_expr((expr_ty)asdl_seq_GET(seq, i)); + asdl_seq_free(seq); +} + +static void +free_seq_stmts(asdl_seq *seq) +{ + int i, n; + n = asdl_seq_LEN(seq); + for (i = 0; i < n; i++) + free_stmt((stmt_ty)asdl_seq_GET(seq, i)); + asdl_seq_free(seq); +} +''' + + def visit(self, object): + self.emit(self.CODE, 0, reflow=False) + + +class FreeVisitor(PickleVisitor): + + def func_begin(self, name, has_seq): + ctype = get_c_type(name) + self.emit("void", 0) + self.emit("free_%s(%s o)" % (name, ctype), 0) + self.emit("{", 0) + if has_seq: + self.emit("int i, n;", 1) + self.emit("asdl_seq *seq;", 1) + self.emit('', 0) + self.emit('if (!o)', 1) + self.emit('return;', 2) + self.emit('', 0) + + def func_end(self): + self.emit("}", 0) + self.emit("", 0) + + def visitSum(self, sum, name): + has_seq = has_sequence(sum.types, True) + self.func_begin(name, has_seq) + if not is_simple(sum): + self.emit("switch (o->kind) {", 1) + for i in range(len(sum.types)): + t = sum.types[i] + self.visitConstructor(t, i + 1, name) + self.emit("}", 1) + self.emit("", 0) + self.emit("free(o);", 1) + self.func_end() + + def visitProduct(self, prod, name): + self.func_begin(name, find_sequence(prod.fields, True)) + for field in prod.fields: + self.visitField(field, name, 1, True) + self.emit("", 0) + self.emit("free(o);", 1) + self.func_end() + + def visitConstructor(self, cons, enum, name): + self.emit("case %s_kind:" % cons.name, 1) + for f in cons.fields: + self.visitField(f, cons.name, 2, False) + self.emit("break;", 2) + + def visitField(self, field, name, depth, product): + def emit(s, d): + self.emit(s, depth + d) + if product: + value = "o->%s" % field.name + else: + value = "o->v.%s.%s" % (name, field.name) + if field.seq: + self.emitSeq(field, value, depth, emit) + + # XXX need to know the simple types in advance, so that we + # don't call free_TYPE() for them. + + elif field.opt: + emit("if (%s) {" % value, 0) + self.free(field, value, depth + 1) + emit("}", 0) + else: + self.free(field, value, depth) + + def emitSeq(self, field, value, depth, emit): + # specialize for freeing sequences of statements and expressions + if str(field.type) in _SPECIALIZED_SEQUENCES: + c_code = "free_seq_%ss(%s);" % (field.type, value) + emit(c_code, 0) + else: + emit("seq = %s;" % value, 0) + emit("n = asdl_seq_LEN(seq);", 0) + emit("for (i = 0; i < n; i++)", 0) + self.free(field, "asdl_seq_GET(seq, i)", depth + 1) + emit("asdl_seq_free(seq);", 0) + + def free(self, field, value, depth): + if str(field.type) in ("identifier", "string", "object"): + ctype = get_c_type(field.type) + self.emit("Py_DECREF((%s)%s);" % (ctype, value), depth) + elif str(field.type) == "bool": + return + else: + ctype = get_c_type(field.type) + self.emit("free_%s((%s)%s);" % (field.type, ctype, value), depth) + + +class MarshalFunctionVisitor(PickleVisitor): + + def func_begin(self, name, has_seq): + ctype = get_c_type(name) + self.emit("int", 0) + self.emit("marshal_write_%s(PyObject **buf, int *off, %s o)" % + (name, ctype), 0) + self.emit("{", 0) + # XXX: add declaration of "int i;" properly + if has_seq or True: + self.emit("int i;", 1) # XXX only need it for sequences + + def func_end(self): + self.emit("return 1;", 1) + self.emit("}", 0) + self.emit("", 0) + + def visitSum(self, sum, name): + has_seq = has_sequence(sum.types, False) + self.func_begin(name, has_seq) + simple = is_simple(sum) + if simple: + self.emit("switch (o) {", 1) + else: + self.emit("switch (o->kind) {", 1) + for i in range(len(sum.types)): + t = sum.types[i] + self.visitConstructor(t, i + 1, name, simple) + self.emit("}", 1) + self.func_end() + + def visitProduct(self, prod, name): + self.func_begin(name, find_sequence(prod.fields, True)) + for field in prod.fields: + self.visitField(field, name, 1, 1) + self.func_end() + + def visitConstructor(self, cons, enum, name, simple): + if simple: + self.emit("case %s:" % cons.name, 1) + self.emit("marshal_write_int(buf, off, %d);" % enum, 2); + self.emit("break;", 2) + else: + self.emit("case %s_kind:" % cons.name, 1) + self.emit("marshal_write_int(buf, off, %d);" % enum, 2) + for f in cons.fields: + self.visitField(f, cons.name, 2, 0) + self.emit("break;", 2) + + def visitField(self, field, name, depth, product): + def emit(s, d): + self.emit(s, depth + d) + if product: + value = "o->%s" % field.name + else: + value = "o->v.%s.%s" % (name, field.name) + if field.seq: + emit("marshal_write_int(buf, off, asdl_seq_LEN(%s));" % value, 0) + emit("for (i = 0; i < asdl_seq_LEN(%s); i++) {" % value, 0) + emit("void *elt = asdl_seq_GET(%s, i);" % value, 1); + ctype = get_c_type(field.type); + emit("marshal_write_%s(buf, off, (%s)elt);" % (field.type, + ctype), 1) + emit("}", 0) + elif field.opt: + emit("if (%s) {" % value, 0) + emit("marshal_write_int(buf, off, 1);", 1) + emit("marshal_write_%s(buf, off, %s);" % (field.type, value), 1) + emit("}", 0) + emit("else {", 0) + emit("marshal_write_int(buf, off, 0);", 1) + emit("}", 0) + else: + emit("marshal_write_%s(buf, off, %s);" % (field.type, value), 0) + +class ChainOfVisitors: + def __init__(self, *visitors): + self.visitors = visitors + + def visit(self, object): + for v in self.visitors: + v.visit(object) + +def main(srcfile): + auto_gen_msg = '/* File automatically generated by %s */\n' % sys.argv[0] + mod = asdl.parse(srcfile) + if not asdl.check(mod): + sys.exit(1) + if INC_DIR: + p = "%s/%s-ast.h" % (INC_DIR, mod.name) + else: + p = "%s-ast.h" % mod.name + f = open(p, "wb") + print >> f, auto_gen_msg + print >> f, '#include "asdl.h"\n' + c = ChainOfVisitors(TypeDefVisitor(f), + StructVisitor(f), + PrototypeVisitor(f), + FreePrototypeVisitor(f), + MarshalPrototypeVisitor(f), + ) + c.visit(mod) + f.close() + + if SRC_DIR: + p = "%s/%s-ast.c" % (SRC_DIR, mod.name) + else: + p = "%s-ast.c" % mod.name + f = open(p, "wb") + print >> f, auto_gen_msg + print >> f, '#include "Python.h"' + print >> f, '#include "%s-ast.h"' % mod.name + print >> f + v = ChainOfVisitors(FunctionVisitor(f), + StaticVisitor(f), + FreeVisitor(f), + MarshalFunctionVisitor(f), + ) + v.visit(mod) + f.close() + +if __name__ == "__main__": + import sys + import getopt + + INC_DIR = '' + SRC_DIR = '' + opts, args = getopt.getopt(sys.argv[1:], "h:c:") + for o, v in opts: + if o == '-h': + INC_DIR = v + if o == '-c': + SRC_DIR = v + if len(args) != 1: + print "Must specify single input file" + main(args[0]) diff --git a/Parser/grammar.mak b/Parser/grammar.mak index a6f1abe..55f028f 100644 --- a/Parser/grammar.mak +++ b/Parser/grammar.mak @@ -15,7 +15,7 @@ # particular case --pragma in PC\pyconfig.h, which demands that # python23.lib get linked in). -LIBS= ..\PCbuild\python23.lib +LIBS= ..\PCbuild\python25.lib CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 1d25437..11d2232 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -21,7 +21,7 @@ static void initerr(perrdetail *err_ret, const char* filename); node * PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) { - return PyParser_ParseStringFlags(s, g, start, err_ret, 0); + return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); } node * @@ -56,7 +56,6 @@ PyParser_ParseStringFlagsFilename(const char *s, const char *filename, return parsetok(tok, g, start, err_ret, flags); } - /* Parse input coming from a file. Return error code, print some errors. */ node * @@ -210,7 +209,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } static void -initerr(perrdetail *err_ret, const char* filename) +initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; err_ret->filename = filename; diff --git a/Parser/spark.py b/Parser/spark.py new file mode 100644 index 0000000..00d9733 --- /dev/null +++ b/Parser/spark.py @@ -0,0 +1,840 @@ +# Copyright (c) 1998-2002 John Aycock +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +__version__ = 'SPARK-0.7 (pre-alpha-5)' + +import re +import sys +import string + +def _namelist(instance): + namelist, namedict, classlist = [], {}, [instance.__class__] + for c in classlist: + for b in c.__bases__: + classlist.append(b) + for name in c.__dict__.keys(): + if not namedict.has_key(name): + namelist.append(name) + namedict[name] = 1 + return namelist + +class GenericScanner: + def __init__(self, flags=0): + pattern = self.reflect() + self.re = re.compile(pattern, re.VERBOSE|flags) + + self.index2func = {} + for name, number in self.re.groupindex.items(): + self.index2func[number-1] = getattr(self, 't_' + name) + + def makeRE(self, name): + doc = getattr(self, name).__doc__ + rv = '(?P<%s>%s)' % (name[2:], doc) + return rv + + def reflect(self): + rv = [] + for name in _namelist(self): + if name[:2] == 't_' and name != 't_default': + rv.append(self.makeRE(name)) + + rv.append(self.makeRE('t_default')) + return string.join(rv, '|') + + def error(self, s, pos): + print "Lexical error at position %s" % pos + raise SystemExit + + def tokenize(self, s): + pos = 0 + n = len(s) + while pos < n: + m = self.re.match(s, pos) + if m is None: + self.error(s, pos) + + groups = m.groups() + for i in range(len(groups)): + if groups[i] and self.index2func.has_key(i): + self.index2func[i](groups[i]) + pos = m.end() + + def t_default(self, s): + r'( . | \n )+' + print "Specification error: unmatched input" + raise SystemExit + +# +# Extracted from GenericParser and made global so that [un]picking works. +# +class _State: + def __init__(self, stateno, items): + self.T, self.complete, self.items = [], [], items + self.stateno = stateno + +class GenericParser: + # + # An Earley parser, as per J. Earley, "An Efficient Context-Free + # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, + # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, + # Carnegie-Mellon University, August 1968. New formulation of + # the parser according to J. Aycock, "Practical Earley Parsing + # and the SPARK Toolkit", Ph.D. thesis, University of Victoria, + # 2001, and J. Aycock and R. N. Horspool, "Practical Earley + # Parsing", unpublished paper, 2001. + # + + def __init__(self, start): + self.rules = {} + self.rule2func = {} + self.rule2name = {} + self.collectRules() + self.augment(start) + self.ruleschanged = 1 + + _NULLABLE = '\e_' + _START = 'START' + _BOF = '|-' + + # + # When pickling, take the time to generate the full state machine; + # some information is then extraneous, too. Unfortunately we + # can't save the rule2func map. + # + def __getstate__(self): + if self.ruleschanged: + # + # XXX - duplicated from parse() + # + self.computeNull() + self.newrules = {} + self.new2old = {} + self.makeNewRules() + self.ruleschanged = 0 + self.edges, self.cores = {}, {} + self.states = { 0: self.makeState0() } + self.makeState(0, self._BOF) + # + # XXX - should find a better way to do this.. + # + changes = 1 + while changes: + changes = 0 + for k, v in self.edges.items(): + if v is None: + state, sym = k + if self.states.has_key(state): + self.goto(state, sym) + changes = 1 + rv = self.__dict__.copy() + for s in self.states.values(): + del s.items + del rv['rule2func'] + del rv['nullable'] + del rv['cores'] + return rv + + def __setstate__(self, D): + self.rules = {} + self.rule2func = {} + self.rule2name = {} + self.collectRules() + start = D['rules'][self._START][0][1][1] # Blech. + self.augment(start) + D['rule2func'] = self.rule2func + D['makeSet'] = self.makeSet_fast + self.__dict__ = D + + # + # A hook for GenericASTBuilder and GenericASTMatcher. Mess + # thee not with this; nor shall thee toucheth the _preprocess + # argument to addRule. + # + def preprocess(self, rule, func): return rule, func + + def addRule(self, doc, func, _preprocess=1): + fn = func + rules = string.split(doc) + + index = [] + for i in range(len(rules)): + if rules[i] == '::=': + index.append(i-1) + index.append(len(rules)) + + for i in range(len(index)-1): + lhs = rules[index[i]] + rhs = rules[index[i]+2:index[i+1]] + rule = (lhs, tuple(rhs)) + + if _preprocess: + rule, fn = self.preprocess(rule, func) + + if self.rules.has_key(lhs): + self.rules[lhs].append(rule) + else: + self.rules[lhs] = [ rule ] + self.rule2func[rule] = fn + self.rule2name[rule] = func.__name__[2:] + self.ruleschanged = 1 + + def collectRules(self): + for name in _namelist(self): + if name[:2] == 'p_': + func = getattr(self, name) + doc = func.__doc__ + self.addRule(doc, func) + + def augment(self, start): + rule = '%s ::= %s %s' % (self._START, self._BOF, start) + self.addRule(rule, lambda args: args[1], 0) + + def computeNull(self): + self.nullable = {} + tbd = [] + + for rulelist in self.rules.values(): + lhs = rulelist[0][0] + self.nullable[lhs] = 0 + for rule in rulelist: + rhs = rule[1] + if len(rhs) == 0: + self.nullable[lhs] = 1 + continue + # + # We only need to consider rules which + # consist entirely of nonterminal symbols. + # This should be a savings on typical + # grammars. + # + for sym in rhs: + if not self.rules.has_key(sym): + break + else: + tbd.append(rule) + changes = 1 + while changes: + changes = 0 + for lhs, rhs in tbd: + if self.nullable[lhs]: + continue + for sym in rhs: + if not self.nullable[sym]: + break + else: + self.nullable[lhs] = 1 + changes = 1 + + def makeState0(self): + s0 = _State(0, []) + for rule in self.newrules[self._START]: + s0.items.append((rule, 0)) + return s0 + + def finalState(self, tokens): + # + # Yuck. + # + if len(self.newrules[self._START]) == 2 and len(tokens) == 0: + return 1 + start = self.rules[self._START][0][1][1] + return self.goto(1, start) + + def makeNewRules(self): + worklist = [] + for rulelist in self.rules.values(): + for rule in rulelist: + worklist.append((rule, 0, 1, rule)) + + for rule, i, candidate, oldrule in worklist: + lhs, rhs = rule + n = len(rhs) + while i < n: + sym = rhs[i] + if not self.rules.has_key(sym) or \ + not self.nullable[sym]: + candidate = 0 + i = i + 1 + continue + + newrhs = list(rhs) + newrhs[i] = self._NULLABLE+sym + newrule = (lhs, tuple(newrhs)) + worklist.append((newrule, i+1, + candidate, oldrule)) + candidate = 0 + i = i + 1 + else: + if candidate: + lhs = self._NULLABLE+lhs + rule = (lhs, rhs) + if self.newrules.has_key(lhs): + self.newrules[lhs].append(rule) + else: + self.newrules[lhs] = [ rule ] + self.new2old[rule] = oldrule + + def typestring(self, token): + return None + + def error(self, token): + print "Syntax error at or near `%s' token" % token + raise SystemExit + + def parse(self, tokens): + sets = [ [(1,0), (2,0)] ] + self.links = {} + + if self.ruleschanged: + self.computeNull() + self.newrules = {} + self.new2old = {} + self.makeNewRules() + self.ruleschanged = 0 + self.edges, self.cores = {}, {} + self.states = { 0: self.makeState0() } + self.makeState(0, self._BOF) + + for i in xrange(len(tokens)): + sets.append([]) + + if sets[i] == []: + break + self.makeSet(tokens[i], sets, i) + else: + sets.append([]) + self.makeSet(None, sets, len(tokens)) + + #_dump(tokens, sets, self.states) + + finalitem = (self.finalState(tokens), 0) + if finalitem not in sets[-2]: + if len(tokens) > 0: + self.error(tokens[i-1]) + else: + self.error(None) + + return self.buildTree(self._START, finalitem, + tokens, len(sets)-2) + + def isnullable(self, sym): + # + # For symbols in G_e only. If we weren't supporting 1.5, + # could just use sym.startswith(). + # + return self._NULLABLE == sym[0:len(self._NULLABLE)] + + def skip(self, (lhs, rhs), pos=0): + n = len(rhs) + while pos < n: + if not self.isnullable(rhs[pos]): + break + pos = pos + 1 + return pos + + def makeState(self, state, sym): + assert sym is not None + # + # Compute \epsilon-kernel state's core and see if + # it exists already. + # + kitems = [] + for rule, pos in self.states[state].items: + lhs, rhs = rule + if rhs[pos:pos+1] == (sym,): + kitems.append((rule, self.skip(rule, pos+1))) + core = kitems + + core.sort() + tcore = tuple(core) + if self.cores.has_key(tcore): + return self.cores[tcore] + # + # Nope, doesn't exist. Compute it and the associated + # \epsilon-nonkernel state together; we'll need it right away. + # + k = self.cores[tcore] = len(self.states) + K, NK = _State(k, kitems), _State(k+1, []) + self.states[k] = K + predicted = {} + + edges = self.edges + rules = self.newrules + for X in K, NK: + worklist = X.items + for item in worklist: + rule, pos = item + lhs, rhs = rule + if pos == len(rhs): + X.complete.append(rule) + continue + + nextSym = rhs[pos] + key = (X.stateno, nextSym) + if not rules.has_key(nextSym): + if not edges.has_key(key): + edges[key] = None + X.T.append(nextSym) + else: + edges[key] = None + if not predicted.has_key(nextSym): + predicted[nextSym] = 1 + for prule in rules[nextSym]: + ppos = self.skip(prule) + new = (prule, ppos) + NK.items.append(new) + # + # Problem: we know K needs generating, but we + # don't yet know about NK. Can't commit anything + # regarding NK to self.edges until we're sure. Should + # we delay committing on both K and NK to avoid this + # hacky code? This creates other problems.. + # + if X is K: + edges = {} + + if NK.items == []: + return k + + # + # Check for \epsilon-nonkernel's core. Unfortunately we + # need to know the entire set of predicted nonterminals + # to do this without accidentally duplicating states. + # + core = predicted.keys() + core.sort() + tcore = tuple(core) + if self.cores.has_key(tcore): + self.edges[(k, None)] = self.cores[tcore] + return k + + nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno + self.edges.update(edges) + self.states[nk] = NK + return k + + def goto(self, state, sym): + key = (state, sym) + if not self.edges.has_key(key): + # + # No transitions from state on sym. + # + return None + + rv = self.edges[key] + if rv is None: + # + # Target state isn't generated yet. Remedy this. + # + rv = self.makeState(state, sym) + self.edges[key] = rv + return rv + + def gotoT(self, state, t): + return [self.goto(state, t)] + + def gotoST(self, state, st): + rv = [] + for t in self.states[state].T: + if st == t: + rv.append(self.goto(state, t)) + return rv + + def add(self, set, item, i=None, predecessor=None, causal=None): + if predecessor is None: + if item not in set: + set.append(item) + else: + key = (item, i) + if item not in set: + self.links[key] = [] + set.append(item) + self.links[key].append((predecessor, causal)) + + def makeSet(self, token, sets, i): + cur, next = sets[i], sets[i+1] + + ttype = token is not None and self.typestring(token) or None + if ttype is not None: + fn, arg = self.gotoT, ttype + else: + fn, arg = self.gotoST, token + + for item in cur: + ptr = (item, i) + state, parent = item + add = fn(state, arg) + for k in add: + if k is not None: + self.add(next, (k, parent), i+1, ptr) + nk = self.goto(k, None) + if nk is not None: + self.add(next, (nk, i+1)) + + if parent == i: + continue + + for rule in self.states[state].complete: + lhs, rhs = rule + for pitem in sets[parent]: + pstate, pparent = pitem + k = self.goto(pstate, lhs) + if k is not None: + why = (item, i, rule) + pptr = (pitem, parent) + self.add(cur, (k, pparent), + i, pptr, why) + nk = self.goto(k, None) + if nk is not None: + self.add(cur, (nk, i)) + + def makeSet_fast(self, token, sets, i): + # + # Call *only* when the entire state machine has been built! + # It relies on self.edges being filled in completely, and + # then duplicates and inlines code to boost speed at the + # cost of extreme ugliness. + # + cur, next = sets[i], sets[i+1] + ttype = token is not None and self.typestring(token) or None + + for item in cur: + ptr = (item, i) + state, parent = item + if ttype is not None: + k = self.edges.get((state, ttype), None) + if k is not None: + #self.add(next, (k, parent), i+1, ptr) + #INLINED --v + new = (k, parent) + key = (new, i+1) + if new not in next: + self.links[key] = [] + next.append(new) + self.links[key].append((ptr, None)) + #INLINED --^ + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + #self.add(next, (nk, i+1)) + #INLINED --v + new = (nk, i+1) + if new not in next: + next.append(new) + #INLINED --^ + else: + add = self.gotoST(state, token) + for k in add: + if k is not None: + self.add(next, (k, parent), i+1, ptr) + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + self.add(next, (nk, i+1)) + + if parent == i: + continue + + for rule in self.states[state].complete: + lhs, rhs = rule + for pitem in sets[parent]: + pstate, pparent = pitem + #k = self.goto(pstate, lhs) + k = self.edges.get((pstate, lhs), None) + if k is not None: + why = (item, i, rule) + pptr = (pitem, parent) + #self.add(cur, (k, pparent), + # i, pptr, why) + #INLINED --v + new = (k, pparent) + key = (new, i) + if new not in cur: + self.links[key] = [] + cur.append(new) + self.links[key].append((pptr, why)) + #INLINED --^ + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + #self.add(cur, (nk, i)) + #INLINED --v + new = (nk, i) + if new not in cur: + cur.append(new) + #INLINED --^ + + def predecessor(self, key, causal): + for p, c in self.links[key]: + if c == causal: + return p + assert 0 + + def causal(self, key): + links = self.links[key] + if len(links) == 1: + return links[0][1] + choices = [] + rule2cause = {} + for p, c in links: + rule = c[2] + choices.append(rule) + rule2cause[rule] = c + return rule2cause[self.ambiguity(choices)] + + def deriveEpsilon(self, nt): + if len(self.newrules[nt]) > 1: + rule = self.ambiguity(self.newrules[nt]) + else: + rule = self.newrules[nt][0] + #print rule + + rhs = rule[1] + attr = [None] * len(rhs) + + for i in range(len(rhs)-1, -1, -1): + attr[i] = self.deriveEpsilon(rhs[i]) + return self.rule2func[self.new2old[rule]](attr) + + def buildTree(self, nt, item, tokens, k): + state, parent = item + + choices = [] + for rule in self.states[state].complete: + if rule[0] == nt: + choices.append(rule) + rule = choices[0] + if len(choices) > 1: + rule = self.ambiguity(choices) + #print rule + + rhs = rule[1] + attr = [None] * len(rhs) + + for i in range(len(rhs)-1, -1, -1): + sym = rhs[i] + if not self.newrules.has_key(sym): + if sym != self._BOF: + attr[i] = tokens[k-1] + key = (item, k) + item, k = self.predecessor(key, None) + #elif self.isnullable(sym): + elif self._NULLABLE == sym[0:len(self._NULLABLE)]: + attr[i] = self.deriveEpsilon(sym) + else: + key = (item, k) + why = self.causal(key) + attr[i] = self.buildTree(sym, why[0], + tokens, why[1]) + item, k = self.predecessor(key, why) + return self.rule2func[self.new2old[rule]](attr) + + def ambiguity(self, rules): + # + # XXX - problem here and in collectRules() if the same rule + # appears in >1 method. Also undefined results if rules + # causing the ambiguity appear in the same method. + # + sortlist = [] + name2index = {} + for i in range(len(rules)): + lhs, rhs = rule = rules[i] + name = self.rule2name[self.new2old[rule]] + sortlist.append((len(rhs), name)) + name2index[name] = i + sortlist.sort() + list = map(lambda (a,b): b, sortlist) + return rules[name2index[self.resolve(list)]] + + def resolve(self, list): + # + # Resolve ambiguity in favor of the shortest RHS. + # Since we walk the tree from the top down, this + # should effectively resolve in favor of a "shift". + # + return list[0] + +# +# GenericASTBuilder automagically constructs a concrete/abstract syntax tree +# for a given input. The extra argument is a class (not an instance!) +# which supports the "__setslice__" and "__len__" methods. +# +# XXX - silently overrides any user code in methods. +# + +class GenericASTBuilder(GenericParser): + def __init__(self, AST, start): + GenericParser.__init__(self, start) + self.AST = AST + + def preprocess(self, rule, func): + rebind = lambda lhs, self=self: \ + lambda args, lhs=lhs, self=self: \ + self.buildASTNode(args, lhs) + lhs, rhs = rule + return rule, rebind(lhs) + + def buildASTNode(self, args, lhs): + children = [] + for arg in args: + if isinstance(arg, self.AST): + children.append(arg) + else: + children.append(self.terminal(arg)) + return self.nonterminal(lhs, children) + + def terminal(self, token): return token + + def nonterminal(self, type, args): + rv = self.AST(type) + rv[:len(args)] = args + return rv + +# +# GenericASTTraversal is a Visitor pattern according to Design Patterns. For +# each node it attempts to invoke the method n_<node type>, falling +# back onto the default() method if the n_* can't be found. The preorder +# traversal also looks for an exit hook named n_<node type>_exit (no default +# routine is called if it's not found). To prematurely halt traversal +# of a subtree, call the prune() method -- this only makes sense for a +# preorder traversal. Node type is determined via the typestring() method. +# + +class GenericASTTraversalPruningException: + pass + +class GenericASTTraversal: + def __init__(self, ast): + self.ast = ast + + def typestring(self, node): + return node.type + + def prune(self): + raise GenericASTTraversalPruningException + + def preorder(self, node=None): + if node is None: + node = self.ast + + try: + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) + except GenericASTTraversalPruningException: + return + + for kid in node: + self.preorder(kid) + + name = name + '_exit' + if hasattr(self, name): + func = getattr(self, name) + func(node) + + def postorder(self, node=None): + if node is None: + node = self.ast + + for kid in node: + self.postorder(kid) + + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) + + + def default(self, node): + pass + +# +# GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__" +# implemented. +# +# XXX - makes assumptions about how GenericParser walks the parse tree. +# + +class GenericASTMatcher(GenericParser): + def __init__(self, start, ast): + GenericParser.__init__(self, start) + self.ast = ast + + def preprocess(self, rule, func): + rebind = lambda func, self=self: \ + lambda args, func=func, self=self: \ + self.foundMatch(args, func) + lhs, rhs = rule + rhslist = list(rhs) + rhslist.reverse() + + return (lhs, tuple(rhslist)), rebind(func) + + def foundMatch(self, args, func): + func(args[-1]) + return args[-1] + + def match_r(self, node): + self.input.insert(0, node) + children = 0 + + for child in node: + if children == 0: + self.input.insert(0, '(') + children = children + 1 + self.match_r(child) + + if children > 0: + self.input.insert(0, ')') + + def match(self, ast=None): + if ast is None: + ast = self.ast + self.input = [] + + self.match_r(ast) + self.parse(self.input) + + def resolve(self, list): + # + # Resolve ambiguity in favor of the longest RHS. + # + return list[-1] + +def _dump(tokens, sets, states): + for i in range(len(sets)): + print 'set', i + for item in sets[i]: + print '\t', item + for (lhs, rhs), pos in states[item[0]].items: + print '\t\t', lhs, '::=', + print string.join(rhs[:pos]), + print '.', + print string.join(rhs[pos:]) + if i < len(tokens): + print + print 'token', str(tokens[i]) + print |