From a50581228ebf9e9f1008022bdef735fb4c722705 Mon Sep 17 00:00:00 2001 From: Jeremy Hylton Date: Mon, 14 Feb 2000 14:14:29 +0000 Subject: split compile.py into two files add StackDepthFinder (and remove push/pop from CodeGen) add several nodes, including Ellipsis, Bit&|^, Exec --- Lib/compiler/pyassem.py | 437 +++++++++++++++++++++ Lib/compiler/pycodegen.py | 722 +++++++++-------------------------- Tools/compiler/compiler/pyassem.py | 437 +++++++++++++++++++++ Tools/compiler/compiler/pycodegen.py | 722 +++++++++-------------------------- 4 files changed, 1246 insertions(+), 1072 deletions(-) create mode 100644 Lib/compiler/pyassem.py create mode 100644 Tools/compiler/compiler/pyassem.py diff --git a/Lib/compiler/pyassem.py b/Lib/compiler/pyassem.py new file mode 100644 index 0000000..047836b --- /dev/null +++ b/Lib/compiler/pyassem.py @@ -0,0 +1,437 @@ +"""Assembler for Python bytecode + +The new module is used to create the code object. The following +attribute definitions are included from the reference manual: + +co_name gives the function name +co_argcount is the number of positional arguments (including + arguments with default values) +co_nlocals is the number of local variables used by the function + (including arguments) +co_varnames is a tuple containing the names of the local variables + (starting with the argument names) +co_code is a string representing the sequence of bytecode instructions +co_consts is a tuple containing the literals used by the bytecode +co_names is a tuple containing the names used by the bytecode +co_filename is the filename from which the code was compiled +co_firstlineno is the first line number of the function +co_lnotab is a string encoding the mapping from byte code offsets + to line numbers. see LineAddrTable below. +co_stacksize is the required stack size (including local variables) +co_flags is an integer encoding a number of flags for the + interpreter. There are four flags: + CO_OPTIMIZED -- uses load fast + CO_NEWLOCALS -- everything? + CO_VARARGS -- use *args + CO_VARKEYWORDS -- uses **args + +If a code object represents a function, the first item in co_consts is +the documentation string of the function, or None if undefined. +""" + +import sys +import dis +import new +import string + +import misc + +# flags for code objects +CO_OPTIMIZED = 0x0001 +CO_NEWLOCALS = 0x0002 +CO_VARARGS = 0x0004 +CO_VARKEYWORDS = 0x0008 + +class PyAssembler: + """Creates Python code objects + """ + + # XXX this class needs to major refactoring + + def __init__(self, args=(), name='?', filename='', + docstring=None): + # XXX why is the default value for flags 3? + self.insts = [] + # used by makeCodeObject + self.argcount = len(args) + self.code = '' + self.consts = [docstring] + self.filename = filename + self.flags = CO_NEWLOCALS + self.name = name + self.names = [] + self.varnames = list(args) or [] + # lnotab support + self.firstlineno = 0 + self.lastlineno = 0 + self.last_addr = 0 + self.lnotab = '' + + def __repr__(self): + return "" % len(self.insts) + + def setFlags(self, val): + """XXX for module's function""" + self.flags = val + + def setOptimized(self): + self.flags = self.flags | CO_OPTIMIZED + + def setVarArgs(self): + self.flags = self.flags | CO_VARARGS + + def setKWArgs(self): + self.flags = self.flags | CO_VARKEYWORDS + + def getCurInst(self): + return len(self.insts) + + def getNextInst(self): + return len(self.insts) + 1 + + def dump(self, io=sys.stdout): + i = 0 + for inst in self.insts: + if inst[0] == 'SET_LINENO': + io.write("\n") + io.write(" %3d " % i) + if len(inst) == 1: + io.write("%s\n" % inst) + else: + io.write("%-15.15s\t%s\n" % inst) + i = i + 1 + + def makeCodeObject(self): + """Make a Python code object + + This creates a Python code object using the new module. This + seems simpler than reverse-engineering the way marshal dumps + code objects into .pyc files. One of the key difficulties is + figuring out how to layout references to code objects that + appear on the VM stack; e.g. + 3 SET_LINENO 1 + 6 LOAD_CONST 0 ( 0 or line > 0: + # write the values in 1-byte chunks that sum + # to desired value + trunc_addr = addr + trunc_line = line + if trunc_addr > 255: + trunc_addr = 255 + if trunc_line > 255: + trunc_line = 255 + self.lnotab.append(trunc_addr) + self.lnotab.append(trunc_line) + addr = addr - trunc_addr + line = line - trunc_line + self.lastline = lineno + self.lastoff = self.codeOffset + + def getCode(self): + return string.join(self.code, '') + + def getTable(self): + return string.join(map(chr, self.lnotab), '') + +class StackRef: + """Manage stack locations for jumps, loops, etc.""" + count = 0 + + def __init__(self, id=None, val=None): + if id is None: + id = StackRef.count + StackRef.count = StackRef.count + 1 + self.id = id + self.val = val + + def __repr__(self): + if self.val: + return "StackRef(val=%d)" % self.val + else: + return "StackRef(id=%d)" % self.id + + def bind(self, inst): + self.val = inst + + def resolve(self): + if self.val is None: + print "UNRESOLVE REF", self + return 0 + return self.val + +class StackDepthTracker: + # XXX need to keep track of stack depth on jumps + + def findDepth(self, insts): + depth = 0 + maxDepth = 0 + for i in insts: + opname = i[0] + delta = self.effect.get(opname, 0) + if delta > 1: + depth = depth + delta + elif delta < 0: + if depth > maxDepth: + maxDepth = depth + depth = depth + delta + else: + if depth > maxDepth: + maxDepth = depth + # now check patterns + for pat, delta in self.patterns: + if opname[:len(pat)] == pat: + depth = depth + delta + break + # if we still haven't found a match + if delta == 0: + meth = getattr(self, opname) + depth = depth + meth(i[1]) + if depth < 0: + depth = 0 + return maxDepth + + effect = { + 'POP_TOP': -1, + 'DUP_TOP': 1, + 'SLICE+1': -1, + 'SLICE+2': -1, + 'SLICE+3': -2, + 'STORE_SLICE+0': -1, + 'STORE_SLICE+1': -2, + 'STORE_SLICE+2': -2, + 'STORE_SLICE+3': -3, + 'DELETE_SLICE+0': -1, + 'DELETE_SLICE+1': -2, + 'DELETE_SLICE+2': -2, + 'DELETE_SLICE+3': -3, + 'STORE_SUBSCR': -3, + 'DELETE_SUBSCR': -2, + # PRINT_EXPR? + 'PRINT_ITEM': -1, + 'LOAD_LOCALS': 1, + 'RETURN_VALUE': -1, + 'EXEC_STMT': -2, + 'BUILD_CLASS': -2, + 'STORE_NAME': -1, + 'STORE_ATTR': -2, + 'DELETE_ATTR': -1, + 'STORE_GLOBAL': -1, + 'BUILD_MAP': 1, + 'COMPARE_OP': -1, + 'STORE_FAST': -1, + } + # use pattern match + patterns = [ + ('BINARY_', -1), + ('LOAD_', 1), + ('IMPORT_', 1), + ] + # special cases + + #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE, + # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE + def UNPACK_TUPLE(self, count): + return count + def UNPACK_LIST(self, count): + return count + def BUILD_TUPLE(self, count): + return -count + def BUILD_LIST(self, count): + return -count + def CALL_FUNCTION(self, argc): + hi, lo = divmod(argc, 256) + return lo + hi * 2 + def MAKE_FUNCTION(self, argc): + return -argc + def BUILD_SLICE(self, argc): + if argc == 2: + return -1 + elif argc == 3: + return -2 + +findDepth = StackDepthTracker().findDepth diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py index 8599e6b..ac93348 100644 --- a/Lib/compiler/pycodegen.py +++ b/Lib/compiler/pycodegen.py @@ -6,6 +6,7 @@ a generic tool and CodeGenerator as a specific tool. """ from p2c import transformer, ast +from pyassem import StackRef, PyAssembler import dis import misc import marshal @@ -149,14 +150,25 @@ class ExampleASTVisitor(ASTVisitor): print class CodeGenerator: - # XXX this should be combined with PythonVMCode. there is no - # clear way to split the functionality into two classes. + """TODO + + EmptyNode + Exec + Invert + LeftShift + Power + RightShift + Sliceobj + Tryexcept + Tryfinally + """ OPTIMIZED = 1 + # XXX should clean up initialization and generateXXX funcs def __init__(self, filename=""): self.filename = filename - self.code = PythonVMCode() + self.code = PyAssembler() self.code.setFlags(0) self.locals = misc.Stack() self.loops = misc.Stack() @@ -164,11 +176,15 @@ class CodeGenerator: self.curStack = 0 self.maxStack = 0 + def emit(self, *args): + # XXX could just use self.emit = self.code.emit + apply(self.code.emit, args) + def _generateFunctionOrLambdaCode(self, func): self.name = func.name self.filename = filename args = func.argnames - self.code = PythonVMCode(args=args, name=func.name, + self.code = PyAssembler(args=args, name=func.name, filename=filename) self.namespace = self.OPTIMIZED if func.varargs: @@ -177,38 +193,34 @@ class CodeGenerator: self.code.setKWArgs() lnf = walk(func.code, LocalNameFinder(args), 0) self.locals.push(lnf.getLocals()) - self.code.setLineNo(func.lineno) + self.emit('SET_LINENO', func.lineno) walk(func.code, self) def generateFunctionCode(self, func): """Generate code for a function body""" self._generateFunctionOrLambdaCode(func) - self.code.emit('LOAD_CONST', None) - self.code.emit('RETURN_VALUE') + self.emit('LOAD_CONST', None) + self.emit('RETURN_VALUE') def generateLambdaCode(self, func): self._generateFunctionOrLambdaCode(func) - self.code.emit('RETURN_VALUE') + self.emit('RETURN_VALUE') def generateClassCode(self, klass): - self.code = PythonVMCode(name=klass.name, + self.code = PyAssembler(name=klass.name, filename=filename) - self.code.setLineNo(klass.lineno) + self.emit('SET_LINENO', klass.lineno) lnf = walk(klass.code, LocalNameFinder(), 0) self.locals.push(lnf.getLocals()) walk(klass.code, self) - self.code.emit('LOAD_LOCALS') - self.code.emit('RETURN_VALUE') + self.emit('LOAD_LOCALS') + self.emit('RETURN_VALUE') - def emit(self): - """Create a Python code object - - XXX It is confusing that this method isn't related to the - method named emit in the PythonVMCode. - """ + def asConst(self): + """Create a Python code object.""" if self.namespace == self.OPTIMIZED: self.code.setOptimized() - return self.code.makeCodeObject(self.maxStack) + return self.code.makeCodeObject() def isLocalName(self, name): return self.locals.top().has_elt(name) @@ -216,11 +228,11 @@ class CodeGenerator: def _nameOp(self, prefix, name): if self.isLocalName(name): if self.namespace == self.OPTIMIZED: - self.code.emit(prefix + '_FAST', name) + self.emit(prefix + '_FAST', name) else: - self.code.emit(prefix + '_NAME', name) + self.emit(prefix + '_NAME', name) else: - self.code.emit(prefix + '_GLOBAL', name) + self.emit(prefix + '_GLOBAL', name) def storeName(self, name): self._nameOp('STORE', name) @@ -231,21 +243,6 @@ class CodeGenerator: def delName(self, name): self._nameOp('DELETE', name) - def push(self, n): - self.curStack = self.curStack + n - if self.curStack > self.maxStack: - self.maxStack = self.curStack - - def pop(self, n): - if n >= self.curStack: - self.curStack = self.curStack - n - else: - self.curStack = 0 - - def assertStackEmpty(self): - if self.curStack != 0: - print "warning: stack should be empty" - def visitNULL(self, node): """Method exists only to stop warning in -v mode""" pass @@ -255,46 +252,45 @@ class CodeGenerator: def visitDiscard(self, node): self.visit(node.expr) - self.code.emit('POP_TOP') - self.pop(1) + self.emit('POP_TOP') return 1 def visitPass(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) def visitModule(self, node): lnf = walk(node.node, LocalNameFinder(), 0) self.locals.push(lnf.getLocals()) self.visit(node.node) - self.code.emit('LOAD_CONST', None) - self.code.emit('RETURN_VALUE') + self.emit('LOAD_CONST', None) + self.emit('RETURN_VALUE') return 1 def visitImport(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) for name in node.names: - self.code.emit('IMPORT_NAME', name) + self.emit('IMPORT_NAME', name) self.storeName(name) def visitFrom(self, node): - self.code.setLineNo(node.lineno) - self.code.emit('IMPORT_NAME', node.modname) + self.emit('SET_LINENO', node.lineno) + self.emit('IMPORT_NAME', node.modname) for name in node.names: - self.code.emit('IMPORT_FROM', name) - self.code.emit('POP_TOP') + self.emit('IMPORT_FROM', name) + self.emit('POP_TOP') def visitClassdef(self, node): - self.code.emit('SET_LINENO', node.lineno) - self.code.emit('LOAD_CONST', node.name) + self.emit('SET_LINENO', node.lineno) + self.emit('LOAD_CONST', node.name) for base in node.bases: self.visit(base) - self.code.emit('BUILD_TUPLE', len(node.bases)) + self.emit('BUILD_TUPLE', len(node.bases)) classBody = CodeGenerator(self.filename) classBody.generateClassCode(node) - self.code.emit('LOAD_CONST', classBody) - self.code.emit('MAKE_FUNCTION', 0) - self.code.emit('CALL_FUNCTION', 0) - self.code.emit('BUILD_CLASS') + self.emit('LOAD_CONST', classBody) + self.emit('MAKE_FUNCTION', 0) + self.emit('CALL_FUNCTION', 0) + self.emit('BUILD_CLASS') self.storeName(node.name) return 1 @@ -302,11 +298,11 @@ class CodeGenerator: """Code common to Function and Lambda nodes""" codeBody = CodeGenerator(self.filename) getattr(codeBody, 'generate%sCode' % kind)(node) - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) for default in node.defaults: self.visit(default) - self.code.emit('LOAD_CONST', codeBody) - self.code.emit('MAKE_FUNCTION', len(node.defaults)) + self.emit('LOAD_CONST', codeBody) + self.emit('MAKE_FUNCTION', len(node.defaults)) def visitFunction(self, node): self._visitFuncOrLambda(node, 'Function') @@ -323,7 +319,7 @@ class CodeGenerator: pos = 0 kw = 0 if hasattr(node, 'lineno'): - self.code.emit('SET_LINENO', node.lineno) + self.emit('SET_LINENO', node.lineno) self.visit(node.node) for arg in node.args: self.visit(arg) @@ -331,11 +327,11 @@ class CodeGenerator: kw = kw + 1 else: pos = pos + 1 - self.code.callFunction(kw << 8 | pos) + self.emit('CALL_FUNCTION', kw << 8 | pos) return 1 def visitKeyword(self, node): - self.code.emit('LOAD_CONST', node.name) + self.emit('LOAD_CONST', node.name) self.visit(node.expr) return 1 @@ -343,17 +339,17 @@ class CodeGenerator: after = StackRef() for test, suite in node.tests: if hasattr(test, 'lineno'): - self.code.setLineNo(test.lineno) + self.emit('SET_LINENO', test.lineno) else: print "warning", "no line number" self.visit(test) dest = StackRef() - self.code.jumpIfFalse(dest) - self.code.popTop() + self.emit('JUMP_IF_FALSE', dest) + self.emit('POP_TOP') self.visit(suite) - self.code.jumpForward(after) + self.emit('JUMP_FORWARD', after) dest.bind(self.code.getCurInst()) - self.code.popTop() + self.emit('POP_TOP') if node.else_: self.visit(node.else_) after.bind(self.code.getCurInst()) @@ -362,7 +358,7 @@ class CodeGenerator: def startLoop(self): l = Loop() self.loops.push(l) - self.code.emit('SETUP_LOOP', l.extentAnchor) + self.emit('SETUP_LOOP', l.extentAnchor) return l def finishLoop(self): @@ -374,42 +370,41 @@ class CodeGenerator: # three refs needed anchor = StackRef() - self.code.emit('SET_LINENO', node.lineno) + self.emit('SET_LINENO', node.lineno) l = self.startLoop() self.visit(node.list) self.visit(ast.Const(0)) l.startAnchor.bind(self.code.getCurInst()) - self.code.setLineNo(node.lineno) - self.code.emit('FOR_LOOP', anchor) - self.push(1) + self.emit('SET_LINENO', node.lineno) + self.emit('FOR_LOOP', anchor) self.visit(node.assign) self.visit(node.body) - self.code.emit('JUMP_ABSOLUTE', l.startAnchor) + self.emit('JUMP_ABSOLUTE', l.startAnchor) anchor.bind(self.code.getCurInst()) - self.code.emit('POP_BLOCK') + self.emit('POP_BLOCK') if node.else_: self.visit(node.else_) self.finishLoop() return 1 def visitWhile(self, node): - self.code.emit('SET_LINENO', node.lineno) + self.emit('SET_LINENO', node.lineno) l = self.startLoop() if node.else_: lElse = StackRef() else: lElse = l.breakAnchor l.startAnchor.bind(self.code.getCurInst()) - self.code.emit('SET_LINENO', node.test.lineno) + self.emit('SET_LINENO', node.test.lineno) self.visit(node.test) - self.code.emit('JUMP_IF_FALSE', lElse) - self.code.emit('POP_TOP') + self.emit('JUMP_IF_FALSE', lElse) + self.emit('POP_TOP') self.visit(node.body) - self.code.emit('JUMP_ABSOLUTE', l.startAnchor) + self.emit('JUMP_ABSOLUTE', l.startAnchor) # note that lElse may be an alias for l.breakAnchor lElse.bind(self.code.getCurInst()) - self.code.emit('POP_TOP') - self.code.emit('POP_BLOCK') + self.emit('POP_TOP') + self.emit('POP_BLOCK') if node.else_: self.visit(node.else_) self.finishLoop() @@ -418,16 +413,15 @@ class CodeGenerator: def visitBreak(self, node): if not self.loops: raise SyntaxError, "'break' outside loop" - self.code.emit('SET_LINENO', node.lineno) - self.code.emit('BREAK_LOOP') + self.emit('SET_LINENO', node.lineno) + self.emit('BREAK_LOOP') def visitContinue(self, node): if not self.loops: raise SyntaxError, "'continue' outside loop" l = self.loops.top() - self.code.emit('SET_LINENO', node.lineno) - self.code.emit('JUMP_ABSOLUTE', l.startAnchor) - + self.emit('SET_LINENO', node.lineno) + self.emit('JUMP_ABSOLUTE', l.startAnchor) def visitCompare(self, node): """Comment from compile.c follows: @@ -470,44 +464,42 @@ class CodeGenerator: for op, code in node.ops[:-1]: # emit every comparison except the last self.visit(code) - self.code.dupTop() - self.code.rotThree() - self.code.compareOp(op) + self.emit('DUP_TOP') + self.emit('ROT_THREE') + self.emit('COMPARE_OP', op) # dupTop and compareOp cancel stack effect - self.code.jumpIfFalse(l1) - self.code.popTop() - self.pop(1) + self.emit('JUMP_IF_FALSE', l1) + self.emit('POP_TOP') if node.ops: # emit the last comparison op, code = node.ops[-1] self.visit(code) - self.code.compareOp(op) - self.pop(1) + self.emit('COMPARE_OP', op) if len(node.ops) > 1: - self.code.jumpForward(l2) + self.emit('JUMP_FORWARD', l2) l1.bind(self.code.getCurInst()) - self.code.rotTwo() - self.code.popTop() - self.pop(1) + self.emit('ROT_TWO') + self.emit('POP_TOP') l2.bind(self.code.getCurInst()) return 1 def visitGetattr(self, node): self.visit(node.expr) - self.code.emit('LOAD_ATTR', node.attrname) - self.push(1) + self.emit('LOAD_ATTR', node.attrname) return 1 def visitSubscript(self, node): self.visit(node.expr) for sub in node.subs[:-1]: self.visit(sub) - self.code.emit('BINARY_SUBSCR') + self.emit('BINARY_SUBSCR') self.visit(node.subs[-1]) if node.flags == 'OP_APPLY': - self.code.emit('BINARY_SUBSCR') - else: - self.code.emit('STORE_SUBSCR') + self.emit('BINARY_SUBSCR') + elif node.flags == 'OP_ASSIGN': + self.emit('STORE_SUBSCR') + elif node.flags == 'OP_DELETE': + self.emit('DELETE_SUBSCR') return 1 @@ -517,26 +509,29 @@ class CodeGenerator: if node.lower: self.visit(node.lower) slice = slice | 1 - self.pop(1) if node.upper: self.visit(node.upper) slice = slice | 2 - self.pop(1) if node.flags == 'OP_APPLY': - self.code.emit('SLICE+%d' % slice) + self.emit('SLICE+%d' % slice) elif node.flags == 'OP_ASSIGN': - self.code.emit('STORE_SLICE+%d' % slice) + self.emit('STORE_SLICE+%d' % slice) elif node.flags == 'OP_DELETE': - self.code.emit('DELETE_SLICE+%d' % slice) + self.emit('DELETE_SLICE+%d' % slice) else: print node.flags raise return 1 def visitAssign(self, node): - self.code.setLineNo(node.lineno) + print "ASSIGN", node.expr + self.emit('SET_LINENO', node.lineno) self.visit(node.expr) - for elt in node.nodes: + dups = len(node.nodes) - 1 + for i in range(len(node.nodes)): + elt = node.nodes[i] + if i < dups: + self.emit('DUP_TOP') if isinstance(elt, ast.Node): self.visit(elt) return 1 @@ -545,18 +540,17 @@ class CodeGenerator: if node.flags != 'OP_ASSIGN': print "oops", node.flags self.storeName(node.name) - self.pop(1) def visitAssAttr(self, node): if node.flags != 'OP_ASSIGN': print "warning: unexpected flags:", node.flags print node self.visit(node.expr) - self.code.emit('STORE_ATTR', node.attrname) + self.emit('STORE_ATTR', node.attrname) return 1 def visitAssTuple(self, node): - self.code.emit('UNPACK_TUPLE', len(node.nodes)) + self.emit('UNPACK_TUPLE', len(node.nodes)) for child in node.nodes: self.visit(child) return 1 @@ -566,13 +560,12 @@ class CodeGenerator: def binaryOp(self, node, op): self.visit(node.left) self.visit(node.right) - self.code.emit(op) - self.pop(1) + self.emit(op) return 1 def unaryOp(self, node, op): self.visit(node.expr) - self.code.emit(op) + self.emit(op) return 1 def visitAdd(self, node): @@ -605,16 +598,50 @@ class CodeGenerator: def visitBackquote(self, node): return self.unaryOp(node, 'UNARY_CONVERT') + def bitOp(self, nodes, op): + self.visit(nodes[0]) + for node in nodes[1:]: + self.visit(node) + self.emit(op) + return 1 + + def visitBitand(self, node): + return self.bitOp(node.nodes, 'BINARY_AND') + + def visitBitor(self, node): + return self.bitOp(node.nodes, 'BINARY_OR') + + def visitBitxor(self, node): + return self.bitOp(node.nodes, 'BINARY_XOR') + def visitTest(self, node, jump): end = StackRef() for child in node.nodes[:-1]: self.visit(child) - self.code.emit(jump, end) - self.code.emit('POP_TOP') + self.emit(jump, end) + self.emit('POP_TOP') self.visit(node.nodes[-1]) end.bind(self.code.getCurInst()) return 1 + def visitAssert(self, node): + # XXX __debug__ and AssertionError appear to be special cases + # -- they are always loaded as globals even if there are local + # names. I guess this is a sort of renaming op. + skip = StackRef() + self.emit('SET_LINENO', node.lineno) + self.emit('LOAD_GLOBAL', '__debug__') + self.emit('JUMP_IF_FALSE', skip) + self.emit('POP_TOP') + self.visit(node.test) + self.emit('JUMP_IF_TRUE', skip) + self.emit('LOAD_GLOBAL', 'AssertionError') + self.visit(node.fail) + self.emit('RAISE_VARARGS', 2) + skip.bind(self.code.getCurInst()) + self.emit('POP_TOP') + return 1 + def visitAnd(self, node): return self.visitTest(node, 'JUMP_IF_FALSE') @@ -623,37 +650,46 @@ class CodeGenerator: def visitName(self, node): self.loadName(node.name) - self.push(1) def visitConst(self, node): - self.code.loadConst(node.value) - self.push(1) + self.emit('LOAD_CONST', node.value) return 1 + def visitEllipsis(self, node): + self.emit('LOAD_CONST', Ellipsis) + return 1 + def visitTuple(self, node): for elt in node.nodes: self.visit(elt) - self.code.emit('BUILD_TUPLE', len(node.nodes)) - self.pop(len(node.nodes)) + self.emit('BUILD_TUPLE', len(node.nodes)) return 1 def visitList(self, node): for elt in node.nodes: self.visit(elt) - self.code.emit('BUILD_LIST', len(node.nodes)) - self.pop(len(node.nodes)) + self.emit('BUILD_LIST', len(node.nodes)) return 1 + def visitDict(self, node): + self.emit('BUILD_MAP', 0) + for k, v in node.items: + # XXX need to add set lineno when there aren't constants + self.emit('DUP_TOP') + self.visit(v) + self.emit('ROT_TWO') + self.visit(k) + self.emit('STORE_SUBSCR') + return 1 + def visitReturn(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) self.visit(node.value) - self.code.returnValue() - self.pop(1) - self.assertStackEmpty() + self.emit('RETURN_VALUE') return 1 def visitRaise(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) n = 0 if node.expr1: self.visit(node.expr1) @@ -664,22 +700,33 @@ class CodeGenerator: if node.expr3: self.visit(node.expr3) n = n + 1 - self.code.raiseVarargs(n) + self.emit('RAISE_VARARGS', n) return 1 def visitPrint(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) for child in node.nodes: self.visit(child) - self.code.emit('PRINT_ITEM') - self.pop(len(node.nodes)) + self.emit('PRINT_ITEM') return 1 def visitPrintnl(self, node): self.visitPrint(node) - self.code.emit('PRINT_NEWLINE') + self.emit('PRINT_NEWLINE') return 1 + def visitExec(self, node): + self.visit(node.expr) + if node.locals is None: + self.emit('LOAD_CONST', None) + else: + self.visit(node.locals) + if node.globals is None: + self.emit('DUP_TOP') + else: + self.visit(node.globals) + self.emit('EXEC_STMT') + class LocalNameFinder: def __init__(self, names=()): self.names = misc.Set() @@ -693,6 +740,9 @@ class LocalNameFinder: self.names.remove(elt) return self.names + def visitDict(self, node): + return 1 + def visitGlobal(self, node): for name in node.names: self.globals.add(name) @@ -726,406 +776,6 @@ class Loop: self.breakAnchor = StackRef() self.extentAnchor = StackRef() -class StackRef: - """Manage stack locations for jumps, loops, etc.""" - count = 0 - - def __init__(self, id=None, val=None): - if id is None: - id = StackRef.count - StackRef.count = StackRef.count + 1 - self.id = id - self.val = val - - def __repr__(self): - if self.val: - return "StackRef(val=%d)" % self.val - else: - return "StackRef(id=%d)" % self.id - - def bind(self, inst): - self.val = inst - - def resolve(self): - if self.val is None: - print "UNRESOLVE REF", self - return 0 - return self.val - -def add_hook(hooks, type, meth): - """Helper function for PythonVMCode _emit_hooks""" - l = hooks.get(type, []) - l.append(meth) - hooks[type] = l - -class PythonVMCode: - """Creates Python code objects - - The new module is used to create the code object. The following - attribute definitions are included from the reference manual: - - co_name gives the function name - co_argcount is the number of positional arguments (including - arguments with default values) - co_nlocals is the number of local variables used by the function - (including arguments) - co_varnames is a tuple containing the names of the local variables - (starting with the argument names) - co_code is a string representing the sequence of bytecode instructions - co_consts is a tuple containing the literals used by the bytecode - co_names is a tuple containing the names used by the bytecode - co_filename is the filename from which the code was compiled - co_firstlineno is the first line number of the function - co_lnotab is a string encoding the mapping from byte code offsets - to line numbers (for detais see the source code of the - interpreter) - see code com_set_lineno and com_add_lnotab - it's a string with 2bytes per set_lineno - - co_stacksize is the required stack size (including local variables) - co_flags is an integer encoding a number of flags for the - interpreter. - - The following flag bits are defined for co_flags: bit 2 is set if - the function uses the "*arguments" syntax to accept an arbitrary - number of positional arguments; bit 3 is set if the function uses - the "**keywords" syntax to accept arbitrary keyword arguments; - other bits are used internally or reserved for future use. - - If a code object represents a function, the first item in - co_consts is the documentation string of the function, or None if - undefined. - """ - - # XXX flag bits - CO_OPTIMIZED = 0x0001 # uses LOAD_FAST! - CO_NEWLOCALS = 0x0002 # everybody uses this? - CO_VARARGS = 0x0004 - CO_VARKEYWORDS = 0x0008 - - def __init__(self, args=(), name='?', filename='', - docstring=None): - # XXX why is the default value for flags 3? - self.insts = [] - # used by makeCodeObject - self.argcount = len(args) - self.code = '' - self.consts = [docstring] - self.filename = filename - self.flags = self.CO_NEWLOCALS - self.name = name - self.names = [] - self.varnames = list(args) or [] - # lnotab support - self.firstlineno = 0 - self.lastlineno = 0 - self.last_addr = 0 - self.lnotab = '' - - def __repr__(self): - return "" % len(self.insts) - - def setFlags(self, val): - """XXX for module's function""" - self.flags = val - - def setOptimized(self): - self.flags = self.flags | self.CO_OPTIMIZED - - def setVarArgs(self): - self.flags = self.flags | self.CO_VARARGS - - def setKWArgs(self): - self.flags = self.flags | self.CO_VARKEYWORDS - - def getCurInst(self): - return len(self.insts) - - def getNextInst(self): - return len(self.insts) + 1 - - def dump(self, io=sys.stdout): - i = 0 - for inst in self.insts: - if inst[0] == 'SET_LINENO': - io.write("\n") - io.write(" %3d " % i) - if len(inst) == 1: - io.write("%s\n" % inst) - else: - io.write("%-15.15s\t%s\n" % inst) - i = i + 1 - - def makeCodeObject(self, stacksize): - """Make a Python code object - - This creates a Python code object using the new module. This - seems simpler than reverse-engineering the way marshal dumps - code objects into .pyc files. One of the key difficulties is - figuring out how to layout references to code objects that - appear on the VM stack; e.g. - 3 SET_LINENO 1 - 6 LOAD_CONST 0 ( 0 or line > 0: - # write the values in 1-byte chunks that sum - # to desired value - trunc_addr = addr - trunc_line = line - if trunc_addr > 255: - trunc_addr = 255 - if trunc_line > 255: - trunc_line = 255 - self.lnotab.append(trunc_addr) - self.lnotab.append(trunc_line) - addr = addr - trunc_addr - line = line - trunc_line - self.lastline = lineno - self.lastoff = self.codeOffset - - def getCode(self): - return string.join(self.code, '') - - def getTable(self): - return string.join(map(chr, self.lnotab), '') - class CompiledModule: """Store the code object for a compiled module @@ -1145,7 +795,7 @@ class CompiledModule: self.ast = t.parsesuite(self.source) cg = CodeGenerator(self.filename) walk(self.ast, cg, walker=ExampleASTVisitor) - self.code = cg.emit() + self.code = cg.asConst() def dump(self, path): """create a .pyc file""" diff --git a/Tools/compiler/compiler/pyassem.py b/Tools/compiler/compiler/pyassem.py new file mode 100644 index 0000000..047836b --- /dev/null +++ b/Tools/compiler/compiler/pyassem.py @@ -0,0 +1,437 @@ +"""Assembler for Python bytecode + +The new module is used to create the code object. The following +attribute definitions are included from the reference manual: + +co_name gives the function name +co_argcount is the number of positional arguments (including + arguments with default values) +co_nlocals is the number of local variables used by the function + (including arguments) +co_varnames is a tuple containing the names of the local variables + (starting with the argument names) +co_code is a string representing the sequence of bytecode instructions +co_consts is a tuple containing the literals used by the bytecode +co_names is a tuple containing the names used by the bytecode +co_filename is the filename from which the code was compiled +co_firstlineno is the first line number of the function +co_lnotab is a string encoding the mapping from byte code offsets + to line numbers. see LineAddrTable below. +co_stacksize is the required stack size (including local variables) +co_flags is an integer encoding a number of flags for the + interpreter. There are four flags: + CO_OPTIMIZED -- uses load fast + CO_NEWLOCALS -- everything? + CO_VARARGS -- use *args + CO_VARKEYWORDS -- uses **args + +If a code object represents a function, the first item in co_consts is +the documentation string of the function, or None if undefined. +""" + +import sys +import dis +import new +import string + +import misc + +# flags for code objects +CO_OPTIMIZED = 0x0001 +CO_NEWLOCALS = 0x0002 +CO_VARARGS = 0x0004 +CO_VARKEYWORDS = 0x0008 + +class PyAssembler: + """Creates Python code objects + """ + + # XXX this class needs to major refactoring + + def __init__(self, args=(), name='?', filename='', + docstring=None): + # XXX why is the default value for flags 3? + self.insts = [] + # used by makeCodeObject + self.argcount = len(args) + self.code = '' + self.consts = [docstring] + self.filename = filename + self.flags = CO_NEWLOCALS + self.name = name + self.names = [] + self.varnames = list(args) or [] + # lnotab support + self.firstlineno = 0 + self.lastlineno = 0 + self.last_addr = 0 + self.lnotab = '' + + def __repr__(self): + return "" % len(self.insts) + + def setFlags(self, val): + """XXX for module's function""" + self.flags = val + + def setOptimized(self): + self.flags = self.flags | CO_OPTIMIZED + + def setVarArgs(self): + self.flags = self.flags | CO_VARARGS + + def setKWArgs(self): + self.flags = self.flags | CO_VARKEYWORDS + + def getCurInst(self): + return len(self.insts) + + def getNextInst(self): + return len(self.insts) + 1 + + def dump(self, io=sys.stdout): + i = 0 + for inst in self.insts: + if inst[0] == 'SET_LINENO': + io.write("\n") + io.write(" %3d " % i) + if len(inst) == 1: + io.write("%s\n" % inst) + else: + io.write("%-15.15s\t%s\n" % inst) + i = i + 1 + + def makeCodeObject(self): + """Make a Python code object + + This creates a Python code object using the new module. This + seems simpler than reverse-engineering the way marshal dumps + code objects into .pyc files. One of the key difficulties is + figuring out how to layout references to code objects that + appear on the VM stack; e.g. + 3 SET_LINENO 1 + 6 LOAD_CONST 0 ( 0 or line > 0: + # write the values in 1-byte chunks that sum + # to desired value + trunc_addr = addr + trunc_line = line + if trunc_addr > 255: + trunc_addr = 255 + if trunc_line > 255: + trunc_line = 255 + self.lnotab.append(trunc_addr) + self.lnotab.append(trunc_line) + addr = addr - trunc_addr + line = line - trunc_line + self.lastline = lineno + self.lastoff = self.codeOffset + + def getCode(self): + return string.join(self.code, '') + + def getTable(self): + return string.join(map(chr, self.lnotab), '') + +class StackRef: + """Manage stack locations for jumps, loops, etc.""" + count = 0 + + def __init__(self, id=None, val=None): + if id is None: + id = StackRef.count + StackRef.count = StackRef.count + 1 + self.id = id + self.val = val + + def __repr__(self): + if self.val: + return "StackRef(val=%d)" % self.val + else: + return "StackRef(id=%d)" % self.id + + def bind(self, inst): + self.val = inst + + def resolve(self): + if self.val is None: + print "UNRESOLVE REF", self + return 0 + return self.val + +class StackDepthTracker: + # XXX need to keep track of stack depth on jumps + + def findDepth(self, insts): + depth = 0 + maxDepth = 0 + for i in insts: + opname = i[0] + delta = self.effect.get(opname, 0) + if delta > 1: + depth = depth + delta + elif delta < 0: + if depth > maxDepth: + maxDepth = depth + depth = depth + delta + else: + if depth > maxDepth: + maxDepth = depth + # now check patterns + for pat, delta in self.patterns: + if opname[:len(pat)] == pat: + depth = depth + delta + break + # if we still haven't found a match + if delta == 0: + meth = getattr(self, opname) + depth = depth + meth(i[1]) + if depth < 0: + depth = 0 + return maxDepth + + effect = { + 'POP_TOP': -1, + 'DUP_TOP': 1, + 'SLICE+1': -1, + 'SLICE+2': -1, + 'SLICE+3': -2, + 'STORE_SLICE+0': -1, + 'STORE_SLICE+1': -2, + 'STORE_SLICE+2': -2, + 'STORE_SLICE+3': -3, + 'DELETE_SLICE+0': -1, + 'DELETE_SLICE+1': -2, + 'DELETE_SLICE+2': -2, + 'DELETE_SLICE+3': -3, + 'STORE_SUBSCR': -3, + 'DELETE_SUBSCR': -2, + # PRINT_EXPR? + 'PRINT_ITEM': -1, + 'LOAD_LOCALS': 1, + 'RETURN_VALUE': -1, + 'EXEC_STMT': -2, + 'BUILD_CLASS': -2, + 'STORE_NAME': -1, + 'STORE_ATTR': -2, + 'DELETE_ATTR': -1, + 'STORE_GLOBAL': -1, + 'BUILD_MAP': 1, + 'COMPARE_OP': -1, + 'STORE_FAST': -1, + } + # use pattern match + patterns = [ + ('BINARY_', -1), + ('LOAD_', 1), + ('IMPORT_', 1), + ] + # special cases + + #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE, + # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE + def UNPACK_TUPLE(self, count): + return count + def UNPACK_LIST(self, count): + return count + def BUILD_TUPLE(self, count): + return -count + def BUILD_LIST(self, count): + return -count + def CALL_FUNCTION(self, argc): + hi, lo = divmod(argc, 256) + return lo + hi * 2 + def MAKE_FUNCTION(self, argc): + return -argc + def BUILD_SLICE(self, argc): + if argc == 2: + return -1 + elif argc == 3: + return -2 + +findDepth = StackDepthTracker().findDepth diff --git a/Tools/compiler/compiler/pycodegen.py b/Tools/compiler/compiler/pycodegen.py index 8599e6b..ac93348 100644 --- a/Tools/compiler/compiler/pycodegen.py +++ b/Tools/compiler/compiler/pycodegen.py @@ -6,6 +6,7 @@ a generic tool and CodeGenerator as a specific tool. """ from p2c import transformer, ast +from pyassem import StackRef, PyAssembler import dis import misc import marshal @@ -149,14 +150,25 @@ class ExampleASTVisitor(ASTVisitor): print class CodeGenerator: - # XXX this should be combined with PythonVMCode. there is no - # clear way to split the functionality into two classes. + """TODO + + EmptyNode + Exec + Invert + LeftShift + Power + RightShift + Sliceobj + Tryexcept + Tryfinally + """ OPTIMIZED = 1 + # XXX should clean up initialization and generateXXX funcs def __init__(self, filename=""): self.filename = filename - self.code = PythonVMCode() + self.code = PyAssembler() self.code.setFlags(0) self.locals = misc.Stack() self.loops = misc.Stack() @@ -164,11 +176,15 @@ class CodeGenerator: self.curStack = 0 self.maxStack = 0 + def emit(self, *args): + # XXX could just use self.emit = self.code.emit + apply(self.code.emit, args) + def _generateFunctionOrLambdaCode(self, func): self.name = func.name self.filename = filename args = func.argnames - self.code = PythonVMCode(args=args, name=func.name, + self.code = PyAssembler(args=args, name=func.name, filename=filename) self.namespace = self.OPTIMIZED if func.varargs: @@ -177,38 +193,34 @@ class CodeGenerator: self.code.setKWArgs() lnf = walk(func.code, LocalNameFinder(args), 0) self.locals.push(lnf.getLocals()) - self.code.setLineNo(func.lineno) + self.emit('SET_LINENO', func.lineno) walk(func.code, self) def generateFunctionCode(self, func): """Generate code for a function body""" self._generateFunctionOrLambdaCode(func) - self.code.emit('LOAD_CONST', None) - self.code.emit('RETURN_VALUE') + self.emit('LOAD_CONST', None) + self.emit('RETURN_VALUE') def generateLambdaCode(self, func): self._generateFunctionOrLambdaCode(func) - self.code.emit('RETURN_VALUE') + self.emit('RETURN_VALUE') def generateClassCode(self, klass): - self.code = PythonVMCode(name=klass.name, + self.code = PyAssembler(name=klass.name, filename=filename) - self.code.setLineNo(klass.lineno) + self.emit('SET_LINENO', klass.lineno) lnf = walk(klass.code, LocalNameFinder(), 0) self.locals.push(lnf.getLocals()) walk(klass.code, self) - self.code.emit('LOAD_LOCALS') - self.code.emit('RETURN_VALUE') + self.emit('LOAD_LOCALS') + self.emit('RETURN_VALUE') - def emit(self): - """Create a Python code object - - XXX It is confusing that this method isn't related to the - method named emit in the PythonVMCode. - """ + def asConst(self): + """Create a Python code object.""" if self.namespace == self.OPTIMIZED: self.code.setOptimized() - return self.code.makeCodeObject(self.maxStack) + return self.code.makeCodeObject() def isLocalName(self, name): return self.locals.top().has_elt(name) @@ -216,11 +228,11 @@ class CodeGenerator: def _nameOp(self, prefix, name): if self.isLocalName(name): if self.namespace == self.OPTIMIZED: - self.code.emit(prefix + '_FAST', name) + self.emit(prefix + '_FAST', name) else: - self.code.emit(prefix + '_NAME', name) + self.emit(prefix + '_NAME', name) else: - self.code.emit(prefix + '_GLOBAL', name) + self.emit(prefix + '_GLOBAL', name) def storeName(self, name): self._nameOp('STORE', name) @@ -231,21 +243,6 @@ class CodeGenerator: def delName(self, name): self._nameOp('DELETE', name) - def push(self, n): - self.curStack = self.curStack + n - if self.curStack > self.maxStack: - self.maxStack = self.curStack - - def pop(self, n): - if n >= self.curStack: - self.curStack = self.curStack - n - else: - self.curStack = 0 - - def assertStackEmpty(self): - if self.curStack != 0: - print "warning: stack should be empty" - def visitNULL(self, node): """Method exists only to stop warning in -v mode""" pass @@ -255,46 +252,45 @@ class CodeGenerator: def visitDiscard(self, node): self.visit(node.expr) - self.code.emit('POP_TOP') - self.pop(1) + self.emit('POP_TOP') return 1 def visitPass(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) def visitModule(self, node): lnf = walk(node.node, LocalNameFinder(), 0) self.locals.push(lnf.getLocals()) self.visit(node.node) - self.code.emit('LOAD_CONST', None) - self.code.emit('RETURN_VALUE') + self.emit('LOAD_CONST', None) + self.emit('RETURN_VALUE') return 1 def visitImport(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) for name in node.names: - self.code.emit('IMPORT_NAME', name) + self.emit('IMPORT_NAME', name) self.storeName(name) def visitFrom(self, node): - self.code.setLineNo(node.lineno) - self.code.emit('IMPORT_NAME', node.modname) + self.emit('SET_LINENO', node.lineno) + self.emit('IMPORT_NAME', node.modname) for name in node.names: - self.code.emit('IMPORT_FROM', name) - self.code.emit('POP_TOP') + self.emit('IMPORT_FROM', name) + self.emit('POP_TOP') def visitClassdef(self, node): - self.code.emit('SET_LINENO', node.lineno) - self.code.emit('LOAD_CONST', node.name) + self.emit('SET_LINENO', node.lineno) + self.emit('LOAD_CONST', node.name) for base in node.bases: self.visit(base) - self.code.emit('BUILD_TUPLE', len(node.bases)) + self.emit('BUILD_TUPLE', len(node.bases)) classBody = CodeGenerator(self.filename) classBody.generateClassCode(node) - self.code.emit('LOAD_CONST', classBody) - self.code.emit('MAKE_FUNCTION', 0) - self.code.emit('CALL_FUNCTION', 0) - self.code.emit('BUILD_CLASS') + self.emit('LOAD_CONST', classBody) + self.emit('MAKE_FUNCTION', 0) + self.emit('CALL_FUNCTION', 0) + self.emit('BUILD_CLASS') self.storeName(node.name) return 1 @@ -302,11 +298,11 @@ class CodeGenerator: """Code common to Function and Lambda nodes""" codeBody = CodeGenerator(self.filename) getattr(codeBody, 'generate%sCode' % kind)(node) - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) for default in node.defaults: self.visit(default) - self.code.emit('LOAD_CONST', codeBody) - self.code.emit('MAKE_FUNCTION', len(node.defaults)) + self.emit('LOAD_CONST', codeBody) + self.emit('MAKE_FUNCTION', len(node.defaults)) def visitFunction(self, node): self._visitFuncOrLambda(node, 'Function') @@ -323,7 +319,7 @@ class CodeGenerator: pos = 0 kw = 0 if hasattr(node, 'lineno'): - self.code.emit('SET_LINENO', node.lineno) + self.emit('SET_LINENO', node.lineno) self.visit(node.node) for arg in node.args: self.visit(arg) @@ -331,11 +327,11 @@ class CodeGenerator: kw = kw + 1 else: pos = pos + 1 - self.code.callFunction(kw << 8 | pos) + self.emit('CALL_FUNCTION', kw << 8 | pos) return 1 def visitKeyword(self, node): - self.code.emit('LOAD_CONST', node.name) + self.emit('LOAD_CONST', node.name) self.visit(node.expr) return 1 @@ -343,17 +339,17 @@ class CodeGenerator: after = StackRef() for test, suite in node.tests: if hasattr(test, 'lineno'): - self.code.setLineNo(test.lineno) + self.emit('SET_LINENO', test.lineno) else: print "warning", "no line number" self.visit(test) dest = StackRef() - self.code.jumpIfFalse(dest) - self.code.popTop() + self.emit('JUMP_IF_FALSE', dest) + self.emit('POP_TOP') self.visit(suite) - self.code.jumpForward(after) + self.emit('JUMP_FORWARD', after) dest.bind(self.code.getCurInst()) - self.code.popTop() + self.emit('POP_TOP') if node.else_: self.visit(node.else_) after.bind(self.code.getCurInst()) @@ -362,7 +358,7 @@ class CodeGenerator: def startLoop(self): l = Loop() self.loops.push(l) - self.code.emit('SETUP_LOOP', l.extentAnchor) + self.emit('SETUP_LOOP', l.extentAnchor) return l def finishLoop(self): @@ -374,42 +370,41 @@ class CodeGenerator: # three refs needed anchor = StackRef() - self.code.emit('SET_LINENO', node.lineno) + self.emit('SET_LINENO', node.lineno) l = self.startLoop() self.visit(node.list) self.visit(ast.Const(0)) l.startAnchor.bind(self.code.getCurInst()) - self.code.setLineNo(node.lineno) - self.code.emit('FOR_LOOP', anchor) - self.push(1) + self.emit('SET_LINENO', node.lineno) + self.emit('FOR_LOOP', anchor) self.visit(node.assign) self.visit(node.body) - self.code.emit('JUMP_ABSOLUTE', l.startAnchor) + self.emit('JUMP_ABSOLUTE', l.startAnchor) anchor.bind(self.code.getCurInst()) - self.code.emit('POP_BLOCK') + self.emit('POP_BLOCK') if node.else_: self.visit(node.else_) self.finishLoop() return 1 def visitWhile(self, node): - self.code.emit('SET_LINENO', node.lineno) + self.emit('SET_LINENO', node.lineno) l = self.startLoop() if node.else_: lElse = StackRef() else: lElse = l.breakAnchor l.startAnchor.bind(self.code.getCurInst()) - self.code.emit('SET_LINENO', node.test.lineno) + self.emit('SET_LINENO', node.test.lineno) self.visit(node.test) - self.code.emit('JUMP_IF_FALSE', lElse) - self.code.emit('POP_TOP') + self.emit('JUMP_IF_FALSE', lElse) + self.emit('POP_TOP') self.visit(node.body) - self.code.emit('JUMP_ABSOLUTE', l.startAnchor) + self.emit('JUMP_ABSOLUTE', l.startAnchor) # note that lElse may be an alias for l.breakAnchor lElse.bind(self.code.getCurInst()) - self.code.emit('POP_TOP') - self.code.emit('POP_BLOCK') + self.emit('POP_TOP') + self.emit('POP_BLOCK') if node.else_: self.visit(node.else_) self.finishLoop() @@ -418,16 +413,15 @@ class CodeGenerator: def visitBreak(self, node): if not self.loops: raise SyntaxError, "'break' outside loop" - self.code.emit('SET_LINENO', node.lineno) - self.code.emit('BREAK_LOOP') + self.emit('SET_LINENO', node.lineno) + self.emit('BREAK_LOOP') def visitContinue(self, node): if not self.loops: raise SyntaxError, "'continue' outside loop" l = self.loops.top() - self.code.emit('SET_LINENO', node.lineno) - self.code.emit('JUMP_ABSOLUTE', l.startAnchor) - + self.emit('SET_LINENO', node.lineno) + self.emit('JUMP_ABSOLUTE', l.startAnchor) def visitCompare(self, node): """Comment from compile.c follows: @@ -470,44 +464,42 @@ class CodeGenerator: for op, code in node.ops[:-1]: # emit every comparison except the last self.visit(code) - self.code.dupTop() - self.code.rotThree() - self.code.compareOp(op) + self.emit('DUP_TOP') + self.emit('ROT_THREE') + self.emit('COMPARE_OP', op) # dupTop and compareOp cancel stack effect - self.code.jumpIfFalse(l1) - self.code.popTop() - self.pop(1) + self.emit('JUMP_IF_FALSE', l1) + self.emit('POP_TOP') if node.ops: # emit the last comparison op, code = node.ops[-1] self.visit(code) - self.code.compareOp(op) - self.pop(1) + self.emit('COMPARE_OP', op) if len(node.ops) > 1: - self.code.jumpForward(l2) + self.emit('JUMP_FORWARD', l2) l1.bind(self.code.getCurInst()) - self.code.rotTwo() - self.code.popTop() - self.pop(1) + self.emit('ROT_TWO') + self.emit('POP_TOP') l2.bind(self.code.getCurInst()) return 1 def visitGetattr(self, node): self.visit(node.expr) - self.code.emit('LOAD_ATTR', node.attrname) - self.push(1) + self.emit('LOAD_ATTR', node.attrname) return 1 def visitSubscript(self, node): self.visit(node.expr) for sub in node.subs[:-1]: self.visit(sub) - self.code.emit('BINARY_SUBSCR') + self.emit('BINARY_SUBSCR') self.visit(node.subs[-1]) if node.flags == 'OP_APPLY': - self.code.emit('BINARY_SUBSCR') - else: - self.code.emit('STORE_SUBSCR') + self.emit('BINARY_SUBSCR') + elif node.flags == 'OP_ASSIGN': + self.emit('STORE_SUBSCR') + elif node.flags == 'OP_DELETE': + self.emit('DELETE_SUBSCR') return 1 @@ -517,26 +509,29 @@ class CodeGenerator: if node.lower: self.visit(node.lower) slice = slice | 1 - self.pop(1) if node.upper: self.visit(node.upper) slice = slice | 2 - self.pop(1) if node.flags == 'OP_APPLY': - self.code.emit('SLICE+%d' % slice) + self.emit('SLICE+%d' % slice) elif node.flags == 'OP_ASSIGN': - self.code.emit('STORE_SLICE+%d' % slice) + self.emit('STORE_SLICE+%d' % slice) elif node.flags == 'OP_DELETE': - self.code.emit('DELETE_SLICE+%d' % slice) + self.emit('DELETE_SLICE+%d' % slice) else: print node.flags raise return 1 def visitAssign(self, node): - self.code.setLineNo(node.lineno) + print "ASSIGN", node.expr + self.emit('SET_LINENO', node.lineno) self.visit(node.expr) - for elt in node.nodes: + dups = len(node.nodes) - 1 + for i in range(len(node.nodes)): + elt = node.nodes[i] + if i < dups: + self.emit('DUP_TOP') if isinstance(elt, ast.Node): self.visit(elt) return 1 @@ -545,18 +540,17 @@ class CodeGenerator: if node.flags != 'OP_ASSIGN': print "oops", node.flags self.storeName(node.name) - self.pop(1) def visitAssAttr(self, node): if node.flags != 'OP_ASSIGN': print "warning: unexpected flags:", node.flags print node self.visit(node.expr) - self.code.emit('STORE_ATTR', node.attrname) + self.emit('STORE_ATTR', node.attrname) return 1 def visitAssTuple(self, node): - self.code.emit('UNPACK_TUPLE', len(node.nodes)) + self.emit('UNPACK_TUPLE', len(node.nodes)) for child in node.nodes: self.visit(child) return 1 @@ -566,13 +560,12 @@ class CodeGenerator: def binaryOp(self, node, op): self.visit(node.left) self.visit(node.right) - self.code.emit(op) - self.pop(1) + self.emit(op) return 1 def unaryOp(self, node, op): self.visit(node.expr) - self.code.emit(op) + self.emit(op) return 1 def visitAdd(self, node): @@ -605,16 +598,50 @@ class CodeGenerator: def visitBackquote(self, node): return self.unaryOp(node, 'UNARY_CONVERT') + def bitOp(self, nodes, op): + self.visit(nodes[0]) + for node in nodes[1:]: + self.visit(node) + self.emit(op) + return 1 + + def visitBitand(self, node): + return self.bitOp(node.nodes, 'BINARY_AND') + + def visitBitor(self, node): + return self.bitOp(node.nodes, 'BINARY_OR') + + def visitBitxor(self, node): + return self.bitOp(node.nodes, 'BINARY_XOR') + def visitTest(self, node, jump): end = StackRef() for child in node.nodes[:-1]: self.visit(child) - self.code.emit(jump, end) - self.code.emit('POP_TOP') + self.emit(jump, end) + self.emit('POP_TOP') self.visit(node.nodes[-1]) end.bind(self.code.getCurInst()) return 1 + def visitAssert(self, node): + # XXX __debug__ and AssertionError appear to be special cases + # -- they are always loaded as globals even if there are local + # names. I guess this is a sort of renaming op. + skip = StackRef() + self.emit('SET_LINENO', node.lineno) + self.emit('LOAD_GLOBAL', '__debug__') + self.emit('JUMP_IF_FALSE', skip) + self.emit('POP_TOP') + self.visit(node.test) + self.emit('JUMP_IF_TRUE', skip) + self.emit('LOAD_GLOBAL', 'AssertionError') + self.visit(node.fail) + self.emit('RAISE_VARARGS', 2) + skip.bind(self.code.getCurInst()) + self.emit('POP_TOP') + return 1 + def visitAnd(self, node): return self.visitTest(node, 'JUMP_IF_FALSE') @@ -623,37 +650,46 @@ class CodeGenerator: def visitName(self, node): self.loadName(node.name) - self.push(1) def visitConst(self, node): - self.code.loadConst(node.value) - self.push(1) + self.emit('LOAD_CONST', node.value) return 1 + def visitEllipsis(self, node): + self.emit('LOAD_CONST', Ellipsis) + return 1 + def visitTuple(self, node): for elt in node.nodes: self.visit(elt) - self.code.emit('BUILD_TUPLE', len(node.nodes)) - self.pop(len(node.nodes)) + self.emit('BUILD_TUPLE', len(node.nodes)) return 1 def visitList(self, node): for elt in node.nodes: self.visit(elt) - self.code.emit('BUILD_LIST', len(node.nodes)) - self.pop(len(node.nodes)) + self.emit('BUILD_LIST', len(node.nodes)) return 1 + def visitDict(self, node): + self.emit('BUILD_MAP', 0) + for k, v in node.items: + # XXX need to add set lineno when there aren't constants + self.emit('DUP_TOP') + self.visit(v) + self.emit('ROT_TWO') + self.visit(k) + self.emit('STORE_SUBSCR') + return 1 + def visitReturn(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) self.visit(node.value) - self.code.returnValue() - self.pop(1) - self.assertStackEmpty() + self.emit('RETURN_VALUE') return 1 def visitRaise(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) n = 0 if node.expr1: self.visit(node.expr1) @@ -664,22 +700,33 @@ class CodeGenerator: if node.expr3: self.visit(node.expr3) n = n + 1 - self.code.raiseVarargs(n) + self.emit('RAISE_VARARGS', n) return 1 def visitPrint(self, node): - self.code.setLineNo(node.lineno) + self.emit('SET_LINENO', node.lineno) for child in node.nodes: self.visit(child) - self.code.emit('PRINT_ITEM') - self.pop(len(node.nodes)) + self.emit('PRINT_ITEM') return 1 def visitPrintnl(self, node): self.visitPrint(node) - self.code.emit('PRINT_NEWLINE') + self.emit('PRINT_NEWLINE') return 1 + def visitExec(self, node): + self.visit(node.expr) + if node.locals is None: + self.emit('LOAD_CONST', None) + else: + self.visit(node.locals) + if node.globals is None: + self.emit('DUP_TOP') + else: + self.visit(node.globals) + self.emit('EXEC_STMT') + class LocalNameFinder: def __init__(self, names=()): self.names = misc.Set() @@ -693,6 +740,9 @@ class LocalNameFinder: self.names.remove(elt) return self.names + def visitDict(self, node): + return 1 + def visitGlobal(self, node): for name in node.names: self.globals.add(name) @@ -726,406 +776,6 @@ class Loop: self.breakAnchor = StackRef() self.extentAnchor = StackRef() -class StackRef: - """Manage stack locations for jumps, loops, etc.""" - count = 0 - - def __init__(self, id=None, val=None): - if id is None: - id = StackRef.count - StackRef.count = StackRef.count + 1 - self.id = id - self.val = val - - def __repr__(self): - if self.val: - return "StackRef(val=%d)" % self.val - else: - return "StackRef(id=%d)" % self.id - - def bind(self, inst): - self.val = inst - - def resolve(self): - if self.val is None: - print "UNRESOLVE REF", self - return 0 - return self.val - -def add_hook(hooks, type, meth): - """Helper function for PythonVMCode _emit_hooks""" - l = hooks.get(type, []) - l.append(meth) - hooks[type] = l - -class PythonVMCode: - """Creates Python code objects - - The new module is used to create the code object. The following - attribute definitions are included from the reference manual: - - co_name gives the function name - co_argcount is the number of positional arguments (including - arguments with default values) - co_nlocals is the number of local variables used by the function - (including arguments) - co_varnames is a tuple containing the names of the local variables - (starting with the argument names) - co_code is a string representing the sequence of bytecode instructions - co_consts is a tuple containing the literals used by the bytecode - co_names is a tuple containing the names used by the bytecode - co_filename is the filename from which the code was compiled - co_firstlineno is the first line number of the function - co_lnotab is a string encoding the mapping from byte code offsets - to line numbers (for detais see the source code of the - interpreter) - see code com_set_lineno and com_add_lnotab - it's a string with 2bytes per set_lineno - - co_stacksize is the required stack size (including local variables) - co_flags is an integer encoding a number of flags for the - interpreter. - - The following flag bits are defined for co_flags: bit 2 is set if - the function uses the "*arguments" syntax to accept an arbitrary - number of positional arguments; bit 3 is set if the function uses - the "**keywords" syntax to accept arbitrary keyword arguments; - other bits are used internally or reserved for future use. - - If a code object represents a function, the first item in - co_consts is the documentation string of the function, or None if - undefined. - """ - - # XXX flag bits - CO_OPTIMIZED = 0x0001 # uses LOAD_FAST! - CO_NEWLOCALS = 0x0002 # everybody uses this? - CO_VARARGS = 0x0004 - CO_VARKEYWORDS = 0x0008 - - def __init__(self, args=(), name='?', filename='', - docstring=None): - # XXX why is the default value for flags 3? - self.insts = [] - # used by makeCodeObject - self.argcount = len(args) - self.code = '' - self.consts = [docstring] - self.filename = filename - self.flags = self.CO_NEWLOCALS - self.name = name - self.names = [] - self.varnames = list(args) or [] - # lnotab support - self.firstlineno = 0 - self.lastlineno = 0 - self.last_addr = 0 - self.lnotab = '' - - def __repr__(self): - return "" % len(self.insts) - - def setFlags(self, val): - """XXX for module's function""" - self.flags = val - - def setOptimized(self): - self.flags = self.flags | self.CO_OPTIMIZED - - def setVarArgs(self): - self.flags = self.flags | self.CO_VARARGS - - def setKWArgs(self): - self.flags = self.flags | self.CO_VARKEYWORDS - - def getCurInst(self): - return len(self.insts) - - def getNextInst(self): - return len(self.insts) + 1 - - def dump(self, io=sys.stdout): - i = 0 - for inst in self.insts: - if inst[0] == 'SET_LINENO': - io.write("\n") - io.write(" %3d " % i) - if len(inst) == 1: - io.write("%s\n" % inst) - else: - io.write("%-15.15s\t%s\n" % inst) - i = i + 1 - - def makeCodeObject(self, stacksize): - """Make a Python code object - - This creates a Python code object using the new module. This - seems simpler than reverse-engineering the way marshal dumps - code objects into .pyc files. One of the key difficulties is - figuring out how to layout references to code objects that - appear on the VM stack; e.g. - 3 SET_LINENO 1 - 6 LOAD_CONST 0 ( 0 or line > 0: - # write the values in 1-byte chunks that sum - # to desired value - trunc_addr = addr - trunc_line = line - if trunc_addr > 255: - trunc_addr = 255 - if trunc_line > 255: - trunc_line = 255 - self.lnotab.append(trunc_addr) - self.lnotab.append(trunc_line) - addr = addr - trunc_addr - line = line - trunc_line - self.lastline = lineno - self.lastoff = self.codeOffset - - def getCode(self): - return string.join(self.code, '') - - def getTable(self): - return string.join(map(chr, self.lnotab), '') - class CompiledModule: """Store the code object for a compiled module @@ -1145,7 +795,7 @@ class CompiledModule: self.ast = t.parsesuite(self.source) cg = CodeGenerator(self.filename) walk(self.ast, cg, walker=ExampleASTVisitor) - self.code = cg.emit() + self.code = cg.asConst() def dump(self, path): """create a .pyc file""" -- cgit v0.12