now produces valid pyc files for a least a trivial subset of the

language. CodeGenerator: * modify to track stack depth * add emit method that call's PythonVMCode's makeCodeObject * thread filenames through in hackish way * set flags for code objects for modules and functions XXX the docs for the flags seem out of date and/or incomplete PythonVMCode: * add doc string describing the elements of a real code object LineAddrTable: * creates an lnotab (no quite correctly though)
author: Jeremy Hylton <jeremy@alum.mit.edu> 2000-02-08 19:01:29 (GMT)
committer: Jeremy Hylton <jeremy@alum.mit.edu> 2000-02-08 19:01:29 (GMT)
commit: 53187f32ebbb6156fa8e3ddf834dfde74cfa3208 (patch)
tree: de9996353e090d4a20420bd89f7d78bbfaf4a465 /Tools/compiler
parent: aa9d2d6123a54dc1ed7f782fc935abcfadb54167 (diff)
download: cpython-53187f32ebbb6156fa8e3ddf834dfde74cfa3208.zip
cpython-53187f32ebbb6156fa8e3ddf834dfde74cfa3208.tar.gz
cpython-53187f32ebbb6156fa8e3ddf834dfde74cfa3208.tar.bz2
1 files changed, 306 insertions, 80 deletions
diff --git a/Tools/compiler/compiler/pycodegen.py b/Tools/compiler/compiler/pycodegen.py
index 067ebc4..ee0c75b 100644
--- a/Tools/compiler/compiler/pycodegen.py
+++ b/Tools/compiler/compiler/pycodegen.py
@@ -11,6 +11,10 @@ import misc
 import marshal
 import new
 import string
+import sys
+import os
+import stat
+import struct
 
 def parse(path):
     f = open(path)
@@ -60,7 +64,7 @@ class ASTVisitor:
     XXX Perhaps I can use a postorder walk for the code generator?
     """
 
-    VERBOSE = 0
+    VERBOSE = 1
 
     def __init__(self):
 	self.node = None
@@ -101,9 +105,34 @@ class ASTVisitor:
             return meth(node)
 
 class CodeGenerator:
-    def __init__(self):
-	self.code = PythonVMCode()
+    def __init__(self, filename=None):
+        self.filename = filename
+	self.code = PythonVMCode(filename=filename)
+        self.code.setFlags(0)
 	self.locals = misc.Stack()
+        # track the current and max stack size
+        # XXX does this belong here or in the PythonVMCode?
+        self.curStack = 0
+        self.maxStack = 0
+
+    def emit(self):
+        """Create a Python code object
+
+        XXX It is confusing that this method isn't related to the
+        method named emit in the PythonVMCode.
+        """
+        return self.code.makeCodeObject(self.maxStack)
+
+    def push(self, n):
+        self.curStack = self.curStack + n
+        if self.curStack > self.maxStack:
+            self.maxStack = self.curStack
+
+    def pop(self, n):
+        if n >= self.curStack:
+            self.curStack = self.curStack - n
+        else:
+            self.curStack = 0
 
     def visitDiscard(self, node):
         return 1
@@ -112,16 +141,16 @@ class CodeGenerator:
 	lnf = walk(node.node, LocalNameFinder())
 	self.locals.push(lnf.getLocals())
         self.visit(node.node)
-        self.code.emit('LOAD_CONST', 'None')
+        self.code.emit('LOAD_CONST', None)
         self.code.emit('RETURN_VALUE')
         return 1
 
     def visitFunction(self, node):
-        codeBody = NestedCodeGenerator(node.code, node.argnames)
-        walk(node.code, codeBody)
+        codeBody = NestedCodeGenerator(node, filename=self.filename)
+        walk(node, codeBody)
         self.code.setLineNo(node.lineno)
-        self.code.emit('LOAD_CONST', codeBody.code)
-        self.code.emit('MAKE_FUNCTION')
+        self.code.emit('LOAD_CONST', codeBody)
+        self.code.emit('MAKE_FUNCTION', 0)
         self.code.emit('STORE_NAME', node.name)
         return 1
 
@@ -212,6 +241,7 @@ class CodeGenerator:
 	self.visit(node.left)
 	self.visit(node.right)
 	self.code.emit(op)
+        self.pop(1)
 	return 1
 
     def visitAdd(self, node):
@@ -232,9 +262,11 @@ class CodeGenerator:
 	    self.code.loadFast(node.name)
 	else:
 	    self.code.loadGlobal(node.name)
+        self.push(1)
 
     def visitConst(self, node):
 	self.code.loadConst(node.value)
+        self.push(1)
 
     def visitReturn(self, node):
 	self.code.setLineNo(node.lineno)
@@ -262,6 +294,7 @@ class CodeGenerator:
 	for child in node.nodes:
 	    self.visit(child)
 	    self.code.emit('PRINT_ITEM')
+        self.pop(len(node.nodes))
 	return 1
 
     def visitPrintnl(self, node):
@@ -276,26 +309,38 @@ class NestedCodeGenerator(CodeGenerator):
     """
     super_init = CodeGenerator.__init__
     
-    def __init__(self, code, args):
+    def __init__(self, func, filename='<?>'):
         """code and args of function or class being walked
 
         XXX need to separately pass to ASTVisitor.  the constructor
         only uses the code object to find the local names
+
+        Copies code form parent __init__ rather than calling it.
         """
-        self.super_init()
-        lnf = walk(code, LocalNameFinder(args))
+        self.name = func.name
+        self.super_init(filename)
+        args = func.argnames
+	self.code = PythonVMCode(len(args), name=func.name,
+                                 filename=filename) 
+        if func.varargs:
+            self.code.setVarArgs()
+        if func.kwargs:
+            self.code.setKWArgs()
+        lnf = walk(func.code, LocalNameFinder(args))
         self.locals.push(lnf.getLocals())
 
+    def __repr__(self):
+        return "<NestedCodeGenerator: %s>" % self.name
+
     def visitFunction(self, node):
 	lnf = walk(node.code, LocalNameFinder(node.argnames))
 	self.locals.push(lnf.getLocals())
         # XXX need to handle def foo((a, b)):
 	self.code.setLineNo(node.lineno)
         self.visit(node.code)
-        self.code.emit('LOAD_CONST', 'None')
+        self.code.emit('LOAD_CONST', None)
         self.code.emit('RETURN_VALUE')
         return 1
-	
 
 class LocalNameFinder:
     def __init__(self, names=()):
@@ -353,64 +398,86 @@ class ForwardRef:
     def resolve(self):
 	return self.val
 
-class CompiledModule:
-    """Store the code object for a compiled module
+def add_hook(hooks, type, meth):
+    """Helper function for PythonVMCode _emit_hooks"""
+    l = hooks.get(type, [])
+    l.append(meth)
+    hooks[type] = l
 
-    XXX Not clear how the code objects will be stored.  Seems possible
-    that a single code attribute is sufficient, because it will
-    contains references to all the need code objects.  That might be
-    messy, though.
+class PythonVMCode:
+    """Creates Python code objects
+    
+    The new module is used to create the code object.  The following
+    attribute definitions are included from the reference manual:
+        
+    co_name gives the function name
+    co_argcount is the number of positional arguments (including
+        arguments with default values) 
+    co_nlocals is the number of local variables used by the function
+        (including arguments)  
+    co_varnames is a tuple containing the names of the local variables
+        (starting with the argument names) 
+    co_code is a string representing the sequence of bytecode instructions 
+    co_consts is a tuple containing the literals used by the bytecode
+    co_names is a tuple containing the names used by the bytecode
+    co_filename is the filename from which the code was compiled
+    co_firstlineno is the first line number of the function
+    co_lnotab is a string encoding the mapping from byte code offsets
+        to line numbers (for detais see the source code of the
+        interpreter)
+        see code com_set_lineno and com_add_lnotab
+        it's a string with 2bytes per set_lineno
+        
+    co_stacksize is the required stack size (including local variables)
+    co_flags is an integer encoding a number of flags for the
+        interpreter.
+
+    The following flag bits are defined for co_flags: bit 2 is set if
+    the function uses the "*arguments" syntax to accept an arbitrary
+    number of positional arguments; bit 3 is set if the function uses
+    the "**keywords" syntax to accept arbitrary keyword arguments;
+    other bits are used internally or reserved for future use.
+
+    If a code object represents a function, the first item in
+    co_consts is the documentation string of the function, or None if
+    undefined.
     """
-    MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24))
-
-    def __init__(self):
-        self.code = None
 
-    def addCode(self, code):
-        """addCode(self: SelfType, code: PythonVMCode)"""
-	
-    def dump(self, path):
-        """create a .pyc file"""
-        f = open(path, 'wb')
-        f.write(self._pyc_header())
-        marshal.dump(self.code, f)
-        f.close()
-        
-    def _pyc_header(self, path):
-        # compile.c uses marshal to write a long directly, with
-        # calling the interface that would also generate a 1-byte code
-        # to indicate the type of the value.  simplest way to get the
-        # same effect is to call marshal and then skip the code.
-        buf = marshal.dumps(self.MAGIC)[1:]
-        # skip the mtime for now, since I don't have the write
-        # structure to pass the filename being compiled into this
-        # instance 
-        return buf + chr(0) * 4
-	
-class PythonVMCode:
+    # XXX flag bits
+    VARARGS = 0x04
+    KWARGS = 0x08
 
-    def __init__(self):
+    def __init__(self, argcount=0, name='?', filename='<?>',
+                 docstring=None):
+        # XXX why is the default value for flags 3?
 	self.insts = []
         # used by makeCodeObject
-        self.argcount = 0
+        self.argcount = argcount
         self.code = ''
-        self.consts = []
-        self.filename = ''
-        self.firstlineno = 0
-        self.flags = 0
-        self.lnotab = None
-        self.name = ''
+        self.consts = [docstring]
+        self.filename = filename
+        self.flags = 3
+        self.name = name
         self.names = []
-        self.nlocals = 0
-        self.stacksize = 2
         self.varnames = []
+        # lnotab support
+        self.firstlineno = 0
+        self.lastlineno = 0
+        self.last_addr = 0
+        self.lnotab = ''
 
     def __repr__(self):
         return "<bytecode: %d instrs>" % len(self.insts)
 
-    def emit(self, *args):
-	print "emit", args
-	self.insts.append(args)
+    def setFlags(self, val):
+        """XXX for module's function"""
+        self.flags = 0
+
+    def setVarArgs(self):
+        self.flags = self.flags | self.VARARGS
+
+    def setKWArgs(self):
+        self.flags = self.flags | self.KWARGS
 
     def getCurInst(self):
 	return len(self.insts)
@@ -418,23 +485,70 @@ class PythonVMCode:
     def getNextInst(self):
 	return len(self.insts) + 1
 
-    def convert(self):
-	"""Convert human-readable names to real bytecode"""
-	pass
+    def dump(self, io=sys.stdout):
+        i = 0
+        for inst in self.insts:
+            if inst[0] == 'SET_LINENO':
+                io.write("\n")
+            io.write("    %3d " % i)
+            if len(inst) == 1:
+                io.write("%s\n" % inst)
+            else:
+                io.write("%-15.15s\t%s\n" % inst)
+            i = i + 1
+
+    def makeCodeObject(self, stacksize):
+        """Make a Python code object
+
+        This creates a Python code object using the new module.  This
+        seems simpler than reverse-engineering the way marshal dumps
+        code objects into .pyc files.  One of the key difficulties is
+        figuring out how to layout references to code objects that
+        appear on the VM stack; e.g.
+          3 SET_LINENO          1
+          6 LOAD_CONST          0 (<code object fact at 8115878 [...]
+          9 MAKE_FUNCTION       0
+         12 STORE_NAME          0 (fact)
 
-    def makeCodeObject(self):
-        """Make a Python code object"""
-        code = []
+        """
+        
         self._findOffsets()
+        lnotab = LineAddrTable()
         for t in self.insts:
             opname = t[0]
             if len(t) == 1:
-                code.append(chr(self.opnum[opname]))
+                lnotab.addCode(chr(self.opnum[opname]))
             elif len(t) == 2:
                 oparg = self._convertArg(opname, t[1])
+                if opname == 'SET_LINENO':
+                    lnotab.nextLine(oparg)
                 hi, lo = divmod(oparg, 256)
-                code.append(chr(self.opnum[opname]) + chr(lo) + chr(hi))
-        return string.join(code, '')
+                lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
+                               chr(hi))
+        # why is a module a special case?
+        if self.flags == 0:
+            nlocals = 0
+        else:
+            nlocals = len(self.varnames)
+        co = new.code(self.argcount, nlocals, stacksize,
+                      self.flags, lnotab.getCode(), self._getConsts(),
+                      tuple(self.names), tuple(self.varnames),
+                      self.filename, self.name, self.firstlineno,
+                      lnotab.getTable())
+        return co
+
+    def _getConsts(self):
+        """Return a tuple for the const slot of a code object
+
+        Converts PythonVMCode objects to code objects
+        """
+        l = []
+        for elt in self.consts:
+            if isinstance(elt, CodeGenerator):
+                l.append(elt.emit())
+            else:
+                l.append(elt)
+        return tuple(l)
 
     def _findOffsets(self):
         """Find offsets for use in resolving ForwardRefs"""
@@ -464,7 +578,10 @@ class PythonVMCode:
         if op == 'LOAD_CONST':
             return self._lookupName(arg, self.consts)
         if op == 'LOAD_FAST':
-            return self._lookupName(arg, self.varnames, self.names)
+            if arg in self.names:
+                return self._lookupName(arg, self.varnames)
+            else:
+                return self._lookupName(arg, self.varnames, self.names)
         if op == 'LOAD_GLOBAL':
             return self._lookupName(arg, self.names)
         if op == 'STORE_NAME':
@@ -475,7 +592,6 @@ class PythonVMCode:
             return self.offsets[arg.resolve()]
         if self.hasjabs.has_elt(op):
             return self.offsets[arg.resolve()] - arg.__offset
-        print op, arg
         return arg
 
     def _lookupName(self, name, list, list2=None):
@@ -511,6 +627,11 @@ class PythonVMCode:
     # it seems redundant to add a function for each opcode,
     # particularly because the method and opcode basically have the
     # same name.
+    # on the other hand, we need to track things like stack depth in
+    # order to generator code objects.  if we wrap instructions in a
+    # method, we get an easy way to track these.  a simpler
+    # approach, however, would be to define hooks that can be called
+    # by emit.
 
     def setLineNo(self, num):
 	self.emit('SET_LINENO', num)
@@ -557,15 +678,120 @@ class PythonVMCode:
     def callFunction(self, num):
 	self.emit('CALL_FUNCTION', num)
 
+    # this version of emit + arbitrary hooks might work, but it's damn
+    # messy.
+
+    def emit(self, *args):
+        self._emitDispatch(args[0], args[1:])
+	self.insts.append(args)
+
+    def _emitDispatch(self, type, args):
+        for func in self._emit_hooks.get(type, []):
+            func(self, args)
+
+    _emit_hooks = {}
+
+class LineAddrTable:
+    """lnotab
+    
+    This class builds the lnotab, which is undocumented but described
+    by com_set_lineno in compile.c.  Here's an attempt at explanation:
+
+    For each SET_LINENO instruction after the first one, two bytes are
+    added to lnotab.  (In some cases, multiple two-byte entries are
+    added.)  The first byte is the distance in bytes between the
+    instruction for the last SET_LINENO and the current SET_LINENO.
+    The second byte is offset in line numbers.  If either offset is
+    greater than 255, multiple two-byte entries are added -- one entry
+    for each factor of 255.
+    """
+
+    def __init__(self):
+        self.code = []
+        self.codeOffset = 0
+        self.firstline = 0
+        self.lastline = 0
+        self.lastoff = 0
+        self.lnotab = []
+
+    def addCode(self, code):
+        self.code.append(code)
+        self.codeOffset = self.codeOffset + len(code)
+
+    def nextLine(self, lineno):
+        if self.firstline == 0:
+            self.firstline = lineno
+            self.lastline = lineno
+        else:
+            # compute deltas
+            addr = self.codeOffset - self.lastoff
+            line = lineno - self.lastline
+            while addr > 0 or line > 0:
+                # write the values in 1-byte chunks that sum
+                # to desired value
+                trunc_addr = addr
+                trunc_line = line
+                if trunc_addr > 255:
+                    trunc_addr = 255
+                if trunc_line > 255:
+                    trunc_line = 255
+                self.lnotab.append(trunc_addr)
+                self.lnotab.append(trunc_line)
+                addr = addr - trunc_addr
+                line = line - trunc_line
+            self.lastline = lineno
+            self.lastoff = self.codeOffset
+
+    def getCode(self):
+        return string.join(self.code, '')
+
+    def getTable(self):
+        return string.join(map(chr, self.lnotab), '')
+    
+class CompiledModule:
+    """Store the code object for a compiled module
+
+    XXX Not clear how the code objects will be stored.  Seems possible
+    that a single code attribute is sufficient, because it will
+    contains references to all the need code objects.  That might be
+    messy, though.
+    """
+    MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24))
+
+    def __init__(self, source, filename):
+        self.source = source
+        self.filename = filename
+
+    def compile(self):
+        t = transformer.Transformer()
+        self.ast = t.parsesuite(self.source)
+        cg = CodeGenerator(self.filename)
+        walk(self.ast, cg)
+        self.code = cg.emit()
+
+    def dump(self, path):
+        """create a .pyc file"""
+        f = open(path, 'wb')
+        f.write(self._pyc_header())
+        marshal.dump(self.code, f)
+        f.close()
+        
+    def _pyc_header(self):
+        # compile.c uses marshal to write a long directly, with
+        # calling the interface that would also generate a 1-byte code
+        # to indicate the type of the value.  simplest way to get the
+        # same effect is to call marshal and then skip the code.
+        magic = marshal.dumps(self.MAGIC)[1:]
+        mtime = os.stat(self.filename)[stat.ST_MTIME]
+        mtime = struct.pack('i', mtime)
+        return magic + mtime
+	
 if __name__ == "__main__":
-    tree = parse('test.py')
-    cg = CodeGenerator()
-    ASTVisitor.VERBOSE = 1
-    w = walk(tree, cg)
-    w.VERBOSE = 1
-    for i in range(len(cg.code.insts)):
-	inst = cg.code.insts[i]
-	if inst[0] == 'SET_LINENO':
-	    print
-	print "%4d" % i, inst
-    code = cg.code.makeCodeObject()
+    if len(sys.argv) > 1:
+        filename = sys.argv[1]
+    else:
+        filename = 'test.py'
+    buf = open(filename).read()
+    mod = CompiledModule(buf, filename)
+    mod.compile()
+    mod.dump(filename + 'c')
author	Jeremy Hylton <jeremy@alum.mit.edu>	2000-02-08 19:01:29 (GMT)
committer	Jeremy Hylton <jeremy@alum.mit.edu>	2000-02-08 19:01:29 (GMT)
commit	53187f32ebbb6156fa8e3ddf834dfde74cfa3208 (patch)
tree	de9996353e090d4a20420bd89f7d78bbfaf4a465 /Tools/compiler
parent	aa9d2d6123a54dc1ed7f782fc935abcfadb54167 (diff)
download	cpython-53187f32ebbb6156fa8e3ddf834dfde74cfa3208.zip cpython-53187f32ebbb6156fa8e3ddf834dfde74cfa3208.tar.gz cpython-53187f32ebbb6156fa8e3ddf834dfde74cfa3208.tar.bz2