From 8b6323d3ef78042118c08703f26cb2adf741ea2e Mon Sep 17 00:00:00 2001 From: Jeremy Hylton Date: Fri, 4 Feb 2000 00:28:21 +0000 Subject: checking in initial weekend's work compile.py: ASTVisitor framework plus bits of a code generator that should be bug-for-buf compatible with compile.c misc.py: Set and Stack helpers test.py: a bit of simple sample code that compile.py will work on --- Lib/compiler/misc.py | 18 ++ Lib/compiler/pycodegen.py | 406 +++++++++++++++++++++++++++++++++++ Tools/compiler/compiler/misc.py | 18 ++ Tools/compiler/compiler/pycodegen.py | 406 +++++++++++++++++++++++++++++++++++ 4 files changed, 848 insertions(+) create mode 100644 Lib/compiler/misc.py create mode 100644 Lib/compiler/pycodegen.py create mode 100644 Tools/compiler/compiler/misc.py create mode 100644 Tools/compiler/compiler/pycodegen.py diff --git a/Lib/compiler/misc.py b/Lib/compiler/misc.py new file mode 100644 index 0000000..5a3e261 --- /dev/null +++ b/Lib/compiler/misc.py @@ -0,0 +1,18 @@ +class Set: + def __init__(self): + self.elts = {} + def add(self, elt): + self.elts[elt] = elt + def items(self): + return self.elts.keys() + def has_elt(self, elt): + return self.elts.has_key(elt) + +class Stack: + def __init__(self): + self.stack = [] + self.pop = self.stack.pop + def push(self, elt): + self.stack.append(elt) + def top(self): + return self.stack[-1] diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py new file mode 100644 index 0000000..5b7b3fd --- /dev/null +++ b/Lib/compiler/pycodegen.py @@ -0,0 +1,406 @@ +"""Python bytecode generator + +Currently contains generic ASTVisitor code, a LocalNameFinder, and a +CodeGenerator. Eventually, this will get split into the ASTVisitor as +a generic tool and CodeGenerator as a specific tool. +""" + +from p2c import transformer, ast +import dis +import misc + +def parse(path): + f = open(path) + src = f.read() + f.close() + t = transformer.Transformer() + return t.parsesuite(src) + +def walk(tree, visitor): + w = ASTVisitor() + w.preorder(tree, visitor) + return w.visitor + +class ASTVisitor: + """Performs a depth-first walk of the AST + + The ASTVisitor will walk the AST, performing either a preorder or + postorder traversal depending on which method is called. + + methods: + preorder(tree, visitor) + postorder(tree, visitor) + tree: an instance of ast.Node + visitor: an instance with visitXXX methods + + The ASTVisitor is responsible for walking over the tree in the + correct order. For each node, it checks the visitor argument for + a method named 'visitNodeType' where NodeType is the name of the + node's class, e.g. Classdef. If the method exists, it is called + with the node as its sole argument. + + The visitor method for a particular node type can control how + child nodes are visited during a preorder walk. (It can't control + the order during a postorder walk, because it is called _after_ + the walk has occurred.) The ASTVisitor modifies the visitor + argument by adding a visit method to the visitor; this method can + be used to visit a particular child node. If the visitor method + returns a true value, the ASTVisitor will not traverse the child + nodes. + + XXX The interface for controlling the preorder walk needs to be + re-considered. The current interface is convenient for visitors + that mostly let the ASTVisitor do everything. For something like + a code generator, where you want to walk to occur in a specific + order, it's a pain to add "return 1" to the end of each method. + + XXX Perhaps I can use a postorder walk for the code generator? + """ + + VERBOSE = 0 + + def __init__(self): + self.node = None + + def preorder(self, tree, visitor): + """Do preorder walk of tree using visitor""" + self.visitor = visitor + visitor.visit = self._preorder + self._preorder(tree) + + def _preorder(self, node): + stop = self.dispatch(node) + if stop: + return + for child in node.getChildren(): + if isinstance(child, ast.Node): + self._preorder(child) + + def postorder(self, tree, visitor): + """Do preorder walk of tree using visitor""" + self.visitor = visitor + visitor.visit = self._postorder + self._postorder(tree) + + def _postorder(self, tree): + for child in node.getChildren(): + if isinstance(child, ast.Node): + self._preorder(child) + self.dispatch(node) + + def dispatch(self, node): + self.node = node + className = node.__class__.__name__ + meth = getattr(self.visitor, 'visit' + className, None) + if self.VERBOSE: + print "dispatch", className, (meth and meth.__name__ or '') + if meth: + return meth(node) + + +class CodeGenerator: + def __init__(self): + self.code = PythonVMCode() + self.locals = misc.Stack() + + def visitDiscard(self, node): + return 1 + + def visitModule(self, node): + lnf = walk(node.node, LocalNameFinder()) + self.locals.push(lnf.getLocals()) + + def visitFunction(self, node): + lnf = walk(node.code, LocalNameFinder(node.argnames)) + self.locals.push(lnf.getLocals()) + self.code.setLineNo(node.lineno) + print node.code + self.visit(node.code) + self.code.emit('LOAD_CONST', 'None') + self.code.emit('RETURN_VALUE') + + def visitCallFunc(self, node): + self.visit(node.node) + for arg in node.args: + self.visit(arg) + self.code.callFunction(len(node.args)) + return 1 + + def visitIf(self, node): + after = ForwardRef() + for test, suite in node.tests: + self.code.setLineNo(test.lineno) + self.visit(test) + dest = ForwardRef() + self.code.jumpIfFalse(dest) + self.code.popTop() + self.visit(suite) + self.code.jumpForward(after) + dest.bind(self.code.getCurInst()) + self.code.popTop() + if node.else_: + self.visit(node.else_) + after.bind(self.code.getCurInst()) + return 1 + + def visitCompare(self, node): + """Comment from compile.c follows: + + The following code is generated for all but the last + comparison in a chain: + + label: on stack: opcode: jump to: + + a + a, b DUP_TOP + a, b, b ROT_THREE + b, a, b COMPARE_OP + b, 0-or-1 JUMP_IF_FALSE L1 + b, 1 POP_TOP + b + + We are now ready to repeat this sequence for the next + comparison in the chain. + + For the last we generate: + + b + b, c COMPARE_OP + 0-or-1 + + If there were any jumps to L1 (i.e., there was more than one + comparison), we generate: + + 0-or-1 JUMP_FORWARD L2 + L1: b, 0 ROT_TWO + 0, b POP_TOP + 0 + L2: 0-or-1 + """ + self.visit(node.expr) + # if refs are never emitted, subsequent bind call has no effect + l1 = ForwardRef() + l2 = ForwardRef() + for op, code in node.ops[:-1]: + # emit every comparison except the last + self.visit(code) + self.code.dupTop() + self.code.rotThree() + self.code.compareOp(op) + self.code.jumpIfFalse(l1) + self.code.popTop() + if node.ops: + # emit the last comparison + op, code = node.ops[-1] + self.visit(code) + self.code.compareOp(op) + if len(node.ops) > 1: + self.code.jumpForward(l2) + l1.bind(self.code.getCurInst()) + self.code.rotTwo() + self.code.popTop() + l2.bind(self.code.getCurInst()) + return 1 + + def binaryOp(self, node, op): + self.visit(node.left) + self.visit(node.right) + self.code.emit(op) + return 1 + + def visitAdd(self, node): + return self.binaryOp(node, 'BINARY_ADD') + + def visitSub(self, node): + return self.binaryOp(node, 'BINARY_SUBTRACT') + + def visitMul(self, node): + return self.binaryOp(node, 'BINARY_MULTIPLY') + + def visitDiv(self, node): + return self.binaryOp(node, 'BINARY_DIVIDE') + + def visitName(self, node): + locals = self.locals.top() + if locals.has_elt(node.name): + self.code.loadFast(node.name) + else: + self.code.loadGlobal(node.name) + + def visitConst(self, node): + self.code.loadConst(node.value) + + def visitReturn(self, node): + self.code.setLineNo(node.lineno) + self.visit(node.value) + self.code.returnValue() + return 1 + + def visitRaise(self, node): + self.code.setLineNo(node.lineno) + n = 0 + if node.expr1: + self.visit(node.expr1) + n = n + 1 + if node.expr2: + self.visit(node.expr2) + n = n + 1 + if node.expr3: + self.visit(node.expr3) + n = n + 1 + self.code.raiseVarargs(n) + return 1 + + def visitPrint(self, node): + self.code.setLineNo(node.lineno) + for child in node.nodes: + self.visit(child) + self.code.emit('PRINT_ITEM') + return 1 + + def visitPrintnl(self, node): + self.visitPrint(node) + self.code.emit('PRINT_NEWLINE') + return 1 + + +class LocalNameFinder: + def __init__(self, names=()): + self.names = misc.Set() + for name in names: + self.names.add(name) + + def getLocals(self): + return self.names + + def visitFunction(self, node): + self.names.add(node.name) + return 1 + + def visitImport(self, node): + for name in node.names: + self.names.add(name) + + def visitFrom(self, node): + for name in node.names: + self.names.add(name) + + def visitClassdef(self, node): + self.names.add(node.name) + return 1 + + def visitAssName(self, node): + self.names.add(node.name) + +class Label: + def __init__(self, num): + self.num = num + def __repr__(self): + return "Label(%d)" % self.num + +class ForwardRef: + count = 0 + + def __init__(self, id=None, val=None): + if id is None: + id = ForwardRef.count + ForwardRef.count = ForwardRef.count + 1 + self.id = id + self.val = val + + def __repr__(self): + if self.val: + return "ForwardRef(val=%d)" % self.val + else: + return "ForwardRef(id=%d)" % self.id + + def bind(self, inst): + self.val = inst + + def resolve(self): + return self.val + +class PythonVMCode: + def __init__(self): + self.insts = [] + + def emit(self, *args): + print "emit", args + self.insts.append(args) + + def getCurInst(self): + return len(self.insts) + + def getNextInst(self): + return len(self.insts) + 1 + + def convert(self): + """Convert human-readable names to real bytecode""" + pass + + opnum = {} + for num in range(len(dis.opname)): + opnum[dis.opname[num]] = num + + # the interface below here seemed good at first. upon real use, + # it seems redundant to add a function for each opcode, + # particularly because the method and opcode basically have the + # same name. + + def setLineNo(self, num): + self.emit('SET_LINENO', num) + + def popTop(self): + self.emit('POP_TOP') + + def dupTop(self): + self.emit('DUP_TOP') + + def rotTwo(self): + self.emit('ROT_TWO') + + def rotThree(self): + self.emit('ROT_THREE') + + def jumpIfFalse(self, dest): + self.emit('JUMP_IF_FALSE', dest) + + def loadFast(self, name): + self.emit('LOAD_FAST', name) + + def loadGlobal(self, name): + self.emit('LOAD_GLOBAL', name) + + def binaryAdd(self): + self.emit('BINARY_ADD') + + def compareOp(self, op): + self.emit('COMPARE_OP', op) + + def loadConst(self, val): + self.emit('LOAD_CONST', val) + + def returnValue(self): + self.emit('RETURN_VALUE') + + def jumpForward(self, dest): + self.emit('JUMP_FORWARD', dest) + + def raiseVarargs(self, num): + self.emit('RAISE_VARARGS', num) + + def callFunction(self, num): + self.emit('CALL_FUNCTION', num) + +if __name__ == "__main__": + tree = parse('test.py') + cg = CodeGenerator() + ASTVisitor.VERBOSE = 1 + w = walk(tree, cg) + w.VERBOSE = 1 + for i in range(len(cg.code.insts)): + inst = cg.code.insts[i] + if inst[0] == 'SET_LINENO': + print + print "%4d" % i, inst + diff --git a/Tools/compiler/compiler/misc.py b/Tools/compiler/compiler/misc.py new file mode 100644 index 0000000..5a3e261 --- /dev/null +++ b/Tools/compiler/compiler/misc.py @@ -0,0 +1,18 @@ +class Set: + def __init__(self): + self.elts = {} + def add(self, elt): + self.elts[elt] = elt + def items(self): + return self.elts.keys() + def has_elt(self, elt): + return self.elts.has_key(elt) + +class Stack: + def __init__(self): + self.stack = [] + self.pop = self.stack.pop + def push(self, elt): + self.stack.append(elt) + def top(self): + return self.stack[-1] diff --git a/Tools/compiler/compiler/pycodegen.py b/Tools/compiler/compiler/pycodegen.py new file mode 100644 index 0000000..5b7b3fd --- /dev/null +++ b/Tools/compiler/compiler/pycodegen.py @@ -0,0 +1,406 @@ +"""Python bytecode generator + +Currently contains generic ASTVisitor code, a LocalNameFinder, and a +CodeGenerator. Eventually, this will get split into the ASTVisitor as +a generic tool and CodeGenerator as a specific tool. +""" + +from p2c import transformer, ast +import dis +import misc + +def parse(path): + f = open(path) + src = f.read() + f.close() + t = transformer.Transformer() + return t.parsesuite(src) + +def walk(tree, visitor): + w = ASTVisitor() + w.preorder(tree, visitor) + return w.visitor + +class ASTVisitor: + """Performs a depth-first walk of the AST + + The ASTVisitor will walk the AST, performing either a preorder or + postorder traversal depending on which method is called. + + methods: + preorder(tree, visitor) + postorder(tree, visitor) + tree: an instance of ast.Node + visitor: an instance with visitXXX methods + + The ASTVisitor is responsible for walking over the tree in the + correct order. For each node, it checks the visitor argument for + a method named 'visitNodeType' where NodeType is the name of the + node's class, e.g. Classdef. If the method exists, it is called + with the node as its sole argument. + + The visitor method for a particular node type can control how + child nodes are visited during a preorder walk. (It can't control + the order during a postorder walk, because it is called _after_ + the walk has occurred.) The ASTVisitor modifies the visitor + argument by adding a visit method to the visitor; this method can + be used to visit a particular child node. If the visitor method + returns a true value, the ASTVisitor will not traverse the child + nodes. + + XXX The interface for controlling the preorder walk needs to be + re-considered. The current interface is convenient for visitors + that mostly let the ASTVisitor do everything. For something like + a code generator, where you want to walk to occur in a specific + order, it's a pain to add "return 1" to the end of each method. + + XXX Perhaps I can use a postorder walk for the code generator? + """ + + VERBOSE = 0 + + def __init__(self): + self.node = None + + def preorder(self, tree, visitor): + """Do preorder walk of tree using visitor""" + self.visitor = visitor + visitor.visit = self._preorder + self._preorder(tree) + + def _preorder(self, node): + stop = self.dispatch(node) + if stop: + return + for child in node.getChildren(): + if isinstance(child, ast.Node): + self._preorder(child) + + def postorder(self, tree, visitor): + """Do preorder walk of tree using visitor""" + self.visitor = visitor + visitor.visit = self._postorder + self._postorder(tree) + + def _postorder(self, tree): + for child in node.getChildren(): + if isinstance(child, ast.Node): + self._preorder(child) + self.dispatch(node) + + def dispatch(self, node): + self.node = node + className = node.__class__.__name__ + meth = getattr(self.visitor, 'visit' + className, None) + if self.VERBOSE: + print "dispatch", className, (meth and meth.__name__ or '') + if meth: + return meth(node) + + +class CodeGenerator: + def __init__(self): + self.code = PythonVMCode() + self.locals = misc.Stack() + + def visitDiscard(self, node): + return 1 + + def visitModule(self, node): + lnf = walk(node.node, LocalNameFinder()) + self.locals.push(lnf.getLocals()) + + def visitFunction(self, node): + lnf = walk(node.code, LocalNameFinder(node.argnames)) + self.locals.push(lnf.getLocals()) + self.code.setLineNo(node.lineno) + print node.code + self.visit(node.code) + self.code.emit('LOAD_CONST', 'None') + self.code.emit('RETURN_VALUE') + + def visitCallFunc(self, node): + self.visit(node.node) + for arg in node.args: + self.visit(arg) + self.code.callFunction(len(node.args)) + return 1 + + def visitIf(self, node): + after = ForwardRef() + for test, suite in node.tests: + self.code.setLineNo(test.lineno) + self.visit(test) + dest = ForwardRef() + self.code.jumpIfFalse(dest) + self.code.popTop() + self.visit(suite) + self.code.jumpForward(after) + dest.bind(self.code.getCurInst()) + self.code.popTop() + if node.else_: + self.visit(node.else_) + after.bind(self.code.getCurInst()) + return 1 + + def visitCompare(self, node): + """Comment from compile.c follows: + + The following code is generated for all but the last + comparison in a chain: + + label: on stack: opcode: jump to: + + a + a, b DUP_TOP + a, b, b ROT_THREE + b, a, b COMPARE_OP + b, 0-or-1 JUMP_IF_FALSE L1 + b, 1 POP_TOP + b + + We are now ready to repeat this sequence for the next + comparison in the chain. + + For the last we generate: + + b + b, c COMPARE_OP + 0-or-1 + + If there were any jumps to L1 (i.e., there was more than one + comparison), we generate: + + 0-or-1 JUMP_FORWARD L2 + L1: b, 0 ROT_TWO + 0, b POP_TOP + 0 + L2: 0-or-1 + """ + self.visit(node.expr) + # if refs are never emitted, subsequent bind call has no effect + l1 = ForwardRef() + l2 = ForwardRef() + for op, code in node.ops[:-1]: + # emit every comparison except the last + self.visit(code) + self.code.dupTop() + self.code.rotThree() + self.code.compareOp(op) + self.code.jumpIfFalse(l1) + self.code.popTop() + if node.ops: + # emit the last comparison + op, code = node.ops[-1] + self.visit(code) + self.code.compareOp(op) + if len(node.ops) > 1: + self.code.jumpForward(l2) + l1.bind(self.code.getCurInst()) + self.code.rotTwo() + self.code.popTop() + l2.bind(self.code.getCurInst()) + return 1 + + def binaryOp(self, node, op): + self.visit(node.left) + self.visit(node.right) + self.code.emit(op) + return 1 + + def visitAdd(self, node): + return self.binaryOp(node, 'BINARY_ADD') + + def visitSub(self, node): + return self.binaryOp(node, 'BINARY_SUBTRACT') + + def visitMul(self, node): + return self.binaryOp(node, 'BINARY_MULTIPLY') + + def visitDiv(self, node): + return self.binaryOp(node, 'BINARY_DIVIDE') + + def visitName(self, node): + locals = self.locals.top() + if locals.has_elt(node.name): + self.code.loadFast(node.name) + else: + self.code.loadGlobal(node.name) + + def visitConst(self, node): + self.code.loadConst(node.value) + + def visitReturn(self, node): + self.code.setLineNo(node.lineno) + self.visit(node.value) + self.code.returnValue() + return 1 + + def visitRaise(self, node): + self.code.setLineNo(node.lineno) + n = 0 + if node.expr1: + self.visit(node.expr1) + n = n + 1 + if node.expr2: + self.visit(node.expr2) + n = n + 1 + if node.expr3: + self.visit(node.expr3) + n = n + 1 + self.code.raiseVarargs(n) + return 1 + + def visitPrint(self, node): + self.code.setLineNo(node.lineno) + for child in node.nodes: + self.visit(child) + self.code.emit('PRINT_ITEM') + return 1 + + def visitPrintnl(self, node): + self.visitPrint(node) + self.code.emit('PRINT_NEWLINE') + return 1 + + +class LocalNameFinder: + def __init__(self, names=()): + self.names = misc.Set() + for name in names: + self.names.add(name) + + def getLocals(self): + return self.names + + def visitFunction(self, node): + self.names.add(node.name) + return 1 + + def visitImport(self, node): + for name in node.names: + self.names.add(name) + + def visitFrom(self, node): + for name in node.names: + self.names.add(name) + + def visitClassdef(self, node): + self.names.add(node.name) + return 1 + + def visitAssName(self, node): + self.names.add(node.name) + +class Label: + def __init__(self, num): + self.num = num + def __repr__(self): + return "Label(%d)" % self.num + +class ForwardRef: + count = 0 + + def __init__(self, id=None, val=None): + if id is None: + id = ForwardRef.count + ForwardRef.count = ForwardRef.count + 1 + self.id = id + self.val = val + + def __repr__(self): + if self.val: + return "ForwardRef(val=%d)" % self.val + else: + return "ForwardRef(id=%d)" % self.id + + def bind(self, inst): + self.val = inst + + def resolve(self): + return self.val + +class PythonVMCode: + def __init__(self): + self.insts = [] + + def emit(self, *args): + print "emit", args + self.insts.append(args) + + def getCurInst(self): + return len(self.insts) + + def getNextInst(self): + return len(self.insts) + 1 + + def convert(self): + """Convert human-readable names to real bytecode""" + pass + + opnum = {} + for num in range(len(dis.opname)): + opnum[dis.opname[num]] = num + + # the interface below here seemed good at first. upon real use, + # it seems redundant to add a function for each opcode, + # particularly because the method and opcode basically have the + # same name. + + def setLineNo(self, num): + self.emit('SET_LINENO', num) + + def popTop(self): + self.emit('POP_TOP') + + def dupTop(self): + self.emit('DUP_TOP') + + def rotTwo(self): + self.emit('ROT_TWO') + + def rotThree(self): + self.emit('ROT_THREE') + + def jumpIfFalse(self, dest): + self.emit('JUMP_IF_FALSE', dest) + + def loadFast(self, name): + self.emit('LOAD_FAST', name) + + def loadGlobal(self, name): + self.emit('LOAD_GLOBAL', name) + + def binaryAdd(self): + self.emit('BINARY_ADD') + + def compareOp(self, op): + self.emit('COMPARE_OP', op) + + def loadConst(self, val): + self.emit('LOAD_CONST', val) + + def returnValue(self): + self.emit('RETURN_VALUE') + + def jumpForward(self, dest): + self.emit('JUMP_FORWARD', dest) + + def raiseVarargs(self, num): + self.emit('RAISE_VARARGS', num) + + def callFunction(self, num): + self.emit('CALL_FUNCTION', num) + +if __name__ == "__main__": + tree = parse('test.py') + cg = CodeGenerator() + ASTVisitor.VERBOSE = 1 + w = walk(tree, cg) + w.VERBOSE = 1 + for i in range(len(cg.code.insts)): + inst = cg.code.insts[i] + if inst[0] == 'SET_LINENO': + print + print "%4d" % i, inst + -- cgit v0.12