diff options
author | Jeremy Hylton <jeremy@alum.mit.edu> | 2000-02-14 14:14:29 (GMT) |
---|---|---|
committer | Jeremy Hylton <jeremy@alum.mit.edu> | 2000-02-14 14:14:29 (GMT) |
commit | a50581228ebf9e9f1008022bdef735fb4c722705 (patch) | |
tree | ed73f998a9ee134471ffb53aa49a51f391d6170a /Lib/compiler/pyassem.py | |
parent | 3050d515714ce7d3a52bcb159bea3e9300008cd3 (diff) | |
download | cpython-a50581228ebf9e9f1008022bdef735fb4c722705.zip cpython-a50581228ebf9e9f1008022bdef735fb4c722705.tar.gz cpython-a50581228ebf9e9f1008022bdef735fb4c722705.tar.bz2 |
split compile.py into two files
add StackDepthFinder (and remove push/pop from CodeGen)
add several nodes, including Ellipsis, Bit&|^, Exec
Diffstat (limited to 'Lib/compiler/pyassem.py')
-rw-r--r-- | Lib/compiler/pyassem.py | 437 |
1 files changed, 437 insertions, 0 deletions
diff --git a/Lib/compiler/pyassem.py b/Lib/compiler/pyassem.py new file mode 100644 index 0000000..047836b --- /dev/null +++ b/Lib/compiler/pyassem.py @@ -0,0 +1,437 @@ +"""Assembler for Python bytecode + +The new module is used to create the code object. The following +attribute definitions are included from the reference manual: + +co_name gives the function name +co_argcount is the number of positional arguments (including + arguments with default values) +co_nlocals is the number of local variables used by the function + (including arguments) +co_varnames is a tuple containing the names of the local variables + (starting with the argument names) +co_code is a string representing the sequence of bytecode instructions +co_consts is a tuple containing the literals used by the bytecode +co_names is a tuple containing the names used by the bytecode +co_filename is the filename from which the code was compiled +co_firstlineno is the first line number of the function +co_lnotab is a string encoding the mapping from byte code offsets + to line numbers. see LineAddrTable below. +co_stacksize is the required stack size (including local variables) +co_flags is an integer encoding a number of flags for the + interpreter. There are four flags: + CO_OPTIMIZED -- uses load fast + CO_NEWLOCALS -- everything? + CO_VARARGS -- use *args + CO_VARKEYWORDS -- uses **args + +If a code object represents a function, the first item in co_consts is +the documentation string of the function, or None if undefined. +""" + +import sys +import dis +import new +import string + +import misc + +# flags for code objects +CO_OPTIMIZED = 0x0001 +CO_NEWLOCALS = 0x0002 +CO_VARARGS = 0x0004 +CO_VARKEYWORDS = 0x0008 + +class PyAssembler: + """Creates Python code objects + """ + + # XXX this class needs to major refactoring + + def __init__(self, args=(), name='?', filename='<?>', + docstring=None): + # XXX why is the default value for flags 3? + self.insts = [] + # used by makeCodeObject + self.argcount = len(args) + self.code = '' + self.consts = [docstring] + self.filename = filename + self.flags = CO_NEWLOCALS + self.name = name + self.names = [] + self.varnames = list(args) or [] + # lnotab support + self.firstlineno = 0 + self.lastlineno = 0 + self.last_addr = 0 + self.lnotab = '' + + def __repr__(self): + return "<bytecode: %d instrs>" % len(self.insts) + + def setFlags(self, val): + """XXX for module's function""" + self.flags = val + + def setOptimized(self): + self.flags = self.flags | CO_OPTIMIZED + + def setVarArgs(self): + self.flags = self.flags | CO_VARARGS + + def setKWArgs(self): + self.flags = self.flags | CO_VARKEYWORDS + + def getCurInst(self): + return len(self.insts) + + def getNextInst(self): + return len(self.insts) + 1 + + def dump(self, io=sys.stdout): + i = 0 + for inst in self.insts: + if inst[0] == 'SET_LINENO': + io.write("\n") + io.write(" %3d " % i) + if len(inst) == 1: + io.write("%s\n" % inst) + else: + io.write("%-15.15s\t%s\n" % inst) + i = i + 1 + + def makeCodeObject(self): + """Make a Python code object + + This creates a Python code object using the new module. This + seems simpler than reverse-engineering the way marshal dumps + code objects into .pyc files. One of the key difficulties is + figuring out how to layout references to code objects that + appear on the VM stack; e.g. + 3 SET_LINENO 1 + 6 LOAD_CONST 0 (<code object fact at 8115878 [...] + 9 MAKE_FUNCTION 0 + 12 STORE_NAME 0 (fact) + """ + + self._findOffsets() + lnotab = LineAddrTable() + for t in self.insts: + opname = t[0] + if len(t) == 1: + lnotab.addCode(chr(self.opnum[opname])) + elif len(t) == 2: + oparg = self._convertArg(opname, t[1]) + if opname == 'SET_LINENO': + lnotab.nextLine(oparg) + try: + hi, lo = divmod(oparg, 256) + except TypeError: + raise TypeError, "untranslated arg: %s, %s" % (opname, oparg) + lnotab.addCode(chr(self.opnum[opname]) + chr(lo) + + chr(hi)) + # why is a module a special case? + if self.flags == 0: + nlocals = 0 + else: + nlocals = len(self.varnames) + # XXX danger! can't pass through here twice + if self.flags & CO_VARKEYWORDS: + self.argcount = self.argcount - 1 + stacksize = findDepth(self.insts) + co = new.code(self.argcount, nlocals, stacksize, + self.flags, lnotab.getCode(), self._getConsts(), + tuple(self.names), tuple(self.varnames), + self.filename, self.name, self.firstlineno, + lnotab.getTable()) + return co + + def _getConsts(self): + """Return a tuple for the const slot of a code object + + Converts PythonVMCode objects to code objects + """ + l = [] + for elt in self.consts: + # XXX might be clearer to just as isinstance(CodeGen) + if hasattr(elt, 'asConst'): + l.append(elt.asConst()) + else: + l.append(elt) + return tuple(l) + + def _findOffsets(self): + """Find offsets for use in resolving StackRefs""" + self.offsets = [] + cur = 0 + for t in self.insts: + self.offsets.append(cur) + l = len(t) + if l == 1: + cur = cur + 1 + elif l == 2: + cur = cur + 3 + arg = t[1] + # XXX this is a total hack: for a reference used + # multiple times, we create a list of offsets and + # expect that we when we pass through the code again + # to actually generate the offsets, we'll pass in the + # same order. + if isinstance(arg, StackRef): + try: + arg.__offset.append(cur) + except AttributeError: + arg.__offset = [cur] + + def _convertArg(self, op, arg): + """Convert the string representation of an arg to a number + + The specific handling depends on the opcode. + + XXX This first implementation isn't going to be very + efficient. + """ + if op == 'SET_LINENO': + return arg + if op == 'LOAD_CONST': + return self._lookupName(arg, self.consts) + if op in self.localOps: + # make sure it's in self.names, but use the bytecode offset + self._lookupName(arg, self.names) + return self._lookupName(arg, self.varnames) + if op in self.globalOps: + return self._lookupName(arg, self.names) + if op in self.nameOps: + return self._lookupName(arg, self.names) + if op == 'COMPARE_OP': + return self.cmp_op.index(arg) + if self.hasjrel.has_elt(op): + offset = arg.__offset[0] + del arg.__offset[0] + return self.offsets[arg.resolve()] - offset + if self.hasjabs.has_elt(op): + return self.offsets[arg.resolve()] + return arg + + nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM', + 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME') + localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST') + globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL') + + def _lookupName(self, name, list, list2=None): + """Return index of name in list, appending if necessary + + Yicky hack: Second list can be used for lookup of local names + where the name needs to be added to varnames and names. + """ + if name in list: + return list.index(name) + else: + end = len(list) + list.append(name) + if list2 is not None: + list2.append(name) + return end + + # Convert some stuff from the dis module for local use + + cmp_op = list(dis.cmp_op) + hasjrel = misc.Set() + for i in dis.hasjrel: + hasjrel.add(dis.opname[i]) + hasjabs = misc.Set() + for i in dis.hasjabs: + hasjabs.add(dis.opname[i]) + + opnum = {} + for num in range(len(dis.opname)): + opnum[dis.opname[num]] = num + + # this version of emit + arbitrary hooks might work, but it's damn + # messy. + + def emit(self, *args): + self._emitDispatch(args[0], args[1:]) + self.insts.append(args) + + def _emitDispatch(self, type, args): + for func in self._emit_hooks.get(type, []): + func(self, args) + + _emit_hooks = {} + +class LineAddrTable: + """lnotab + + This class builds the lnotab, which is undocumented but described + by com_set_lineno in compile.c. Here's an attempt at explanation: + + For each SET_LINENO instruction after the first one, two bytes are + added to lnotab. (In some cases, multiple two-byte entries are + added.) The first byte is the distance in bytes between the + instruction for the last SET_LINENO and the current SET_LINENO. + The second byte is offset in line numbers. If either offset is + greater than 255, multiple two-byte entries are added -- one entry + for each factor of 255. + """ + + def __init__(self): + self.code = [] + self.codeOffset = 0 + self.firstline = 0 + self.lastline = 0 + self.lastoff = 0 + self.lnotab = [] + + def addCode(self, code): + self.code.append(code) + self.codeOffset = self.codeOffset + len(code) + + def nextLine(self, lineno): + if self.firstline == 0: + self.firstline = lineno + self.lastline = lineno + else: + # compute deltas + addr = self.codeOffset - self.lastoff + line = lineno - self.lastline + while addr > 0 or line > 0: + # write the values in 1-byte chunks that sum + # to desired value + trunc_addr = addr + trunc_line = line + if trunc_addr > 255: + trunc_addr = 255 + if trunc_line > 255: + trunc_line = 255 + self.lnotab.append(trunc_addr) + self.lnotab.append(trunc_line) + addr = addr - trunc_addr + line = line - trunc_line + self.lastline = lineno + self.lastoff = self.codeOffset + + def getCode(self): + return string.join(self.code, '') + + def getTable(self): + return string.join(map(chr, self.lnotab), '') + +class StackRef: + """Manage stack locations for jumps, loops, etc.""" + count = 0 + + def __init__(self, id=None, val=None): + if id is None: + id = StackRef.count + StackRef.count = StackRef.count + 1 + self.id = id + self.val = val + + def __repr__(self): + if self.val: + return "StackRef(val=%d)" % self.val + else: + return "StackRef(id=%d)" % self.id + + def bind(self, inst): + self.val = inst + + def resolve(self): + if self.val is None: + print "UNRESOLVE REF", self + return 0 + return self.val + +class StackDepthTracker: + # XXX need to keep track of stack depth on jumps + + def findDepth(self, insts): + depth = 0 + maxDepth = 0 + for i in insts: + opname = i[0] + delta = self.effect.get(opname, 0) + if delta > 1: + depth = depth + delta + elif delta < 0: + if depth > maxDepth: + maxDepth = depth + depth = depth + delta + else: + if depth > maxDepth: + maxDepth = depth + # now check patterns + for pat, delta in self.patterns: + if opname[:len(pat)] == pat: + depth = depth + delta + break + # if we still haven't found a match + if delta == 0: + meth = getattr(self, opname) + depth = depth + meth(i[1]) + if depth < 0: + depth = 0 + return maxDepth + + effect = { + 'POP_TOP': -1, + 'DUP_TOP': 1, + 'SLICE+1': -1, + 'SLICE+2': -1, + 'SLICE+3': -2, + 'STORE_SLICE+0': -1, + 'STORE_SLICE+1': -2, + 'STORE_SLICE+2': -2, + 'STORE_SLICE+3': -3, + 'DELETE_SLICE+0': -1, + 'DELETE_SLICE+1': -2, + 'DELETE_SLICE+2': -2, + 'DELETE_SLICE+3': -3, + 'STORE_SUBSCR': -3, + 'DELETE_SUBSCR': -2, + # PRINT_EXPR? + 'PRINT_ITEM': -1, + 'LOAD_LOCALS': 1, + 'RETURN_VALUE': -1, + 'EXEC_STMT': -2, + 'BUILD_CLASS': -2, + 'STORE_NAME': -1, + 'STORE_ATTR': -2, + 'DELETE_ATTR': -1, + 'STORE_GLOBAL': -1, + 'BUILD_MAP': 1, + 'COMPARE_OP': -1, + 'STORE_FAST': -1, + } + # use pattern match + patterns = [ + ('BINARY_', -1), + ('LOAD_', 1), + ('IMPORT_', 1), + ] + # special cases + + #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE, + # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE + def UNPACK_TUPLE(self, count): + return count + def UNPACK_LIST(self, count): + return count + def BUILD_TUPLE(self, count): + return -count + def BUILD_LIST(self, count): + return -count + def CALL_FUNCTION(self, argc): + hi, lo = divmod(argc, 256) + return lo + hi * 2 + def MAKE_FUNCTION(self, argc): + return -argc + def BUILD_SLICE(self, argc): + if argc == 2: + return -1 + elif argc == 3: + return -2 + +findDepth = StackDepthTracker().findDepth |