diff options
author | Fred Drake <fdrake@acm.org> | 2001-09-27 20:06:07 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2001-09-27 20:06:07 (GMT) |
commit | e2f9917f9f2c573f84605d75d70ef4fb3d54a740 (patch) | |
tree | 19039f1764ba5919de36782080db88e4118b0c6c /Doc | |
parent | 876389e5d8c752a4e9088bd6b02863679b4a5047 (diff) | |
download | cpython-e2f9917f9f2c573f84605d75d70ef4fb3d54a740.zip cpython-e2f9917f9f2c573f84605d75d70ef4fb3d54a740.tar.gz cpython-e2f9917f9f2c573f84605d75d70ef4fb3d54a740.tar.bz2 |
Migrate the compiler documentation from the Tools/compiler/doc/ directory.
Changes made to make it work in the new location.
Diffstat (limited to 'Doc')
-rw-r--r-- | Doc/lib/asttable.tex | 253 | ||||
-rw-r--r-- | Doc/lib/compiler.tex | 336 |
2 files changed, 589 insertions, 0 deletions
diff --git a/Doc/lib/asttable.tex b/Doc/lib/asttable.tex new file mode 100644 index 0000000..7f6ba9f --- /dev/null +++ b/Doc/lib/asttable.tex @@ -0,0 +1,253 @@ +\begin{longtableiii}{lll}{class}{Node type}{Attribute}{Value} + +\lineiii{Add}{\member{left}}{left operand} +\lineiii{}{\member{right}}{right operand} +\hline + +\lineiii{And}{\member{nodes}}{list of operands} +\hline + +\lineiii{AssAttr}{}{\emph{attribute as target of assignment}} +\lineiii{}{\member{expr}}{expression on the left-hand side of the dot} +\lineiii{}{\member{attrname}}{the attribute name, a string} +\lineiii{}{\member{flags}}{XXX} +\hline + +\lineiii{AssList}{\member{nodes}}{list of list elements being assigned to} +\hline + +\lineiii{AssName}{\member{name}}{name being assigned to} +\lineiii{}{\member{flags}}{XXX} +\hline + +\lineiii{AssTuple}{\member{nodes}}{list of tuple elements being assigned to} +\hline + +\lineiii{Assert}{\member{test}}{the expression to be tested} +\lineiii{}{\member{fail}}{the value of the \exception{AssertionError}} +\hline + +\lineiii{Assign}{\member{nodes}}{a list of assignment targets, one per equal sign} +\lineiii{}{\member{expr}}{the value being assigned} +\hline + +\lineiii{AugAssign}{\member{node}}{} +\lineiii{}{\member{op}}{} +\lineiii{}{\member{expr}}{} +\hline + +\lineiii{Backquote}{\member{expr}}{} +\hline + +\lineiii{Bitand}{\member{nodes}}{} +\hline + +\lineiii{Bitor}{\member{nodes}}{} +\hline + +\lineiii{Bitxor}{\member{nodes}}{} +\hline + +\lineiii{Break}{}{} +\hline + +\lineiii{CallFunc}{\member{node}}{expression for the callee} +\lineiii{}{\member{args}}{a list of arguments} +\lineiii{}{\member{star_args}}{the extended *-arg value} +\lineiii{}{\member{dstar_args}}{the extended **-arg value} +\hline + +\lineiii{Class}{\member{name}}{the name of the class, a string} +\lineiii{}{\member{bases}}{a list of base classes} +\lineiii{}{\member{doc}}{doc string, a string or \code{None}} +\lineiii{}{\member{code}}{the body of the class statement} +\hline + +\lineiii{Compare}{\member{expr}}{} +\lineiii{}{\member{ops}}{} +\hline + +\lineiii{Const}{\member{value}}{} +\hline + +\lineiii{Continue}{}{} +\hline + +\lineiii{Dict}{\member{items}}{} +\hline + +\lineiii{Discard}{\member{expr}}{} +\hline + +\lineiii{Div}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{Ellipsis}{}{} +\hline + +\lineiii{Exec}{\member{expr}}{} +\lineiii{}{\member{locals}}{} +\lineiii{}{\member{globals}}{} +\hline + +\lineiii{For}{\member{assign}}{} +\lineiii{}{\member{list}}{} +\lineiii{}{\member{body}}{} +\lineiii{}{\member{else_}}{} +\hline + +\lineiii{From}{\member{modname}}{} +\lineiii{}{\member{names}}{} +\hline + +\lineiii{Function}{\member{name}}{name used in def, a string} +\lineiii{}{\member{argnames}}{list of argument names, as strings} +\lineiii{}{\member{defaults}}{list of default values} +\lineiii{}{\member{flags}}{xxx} +\lineiii{}{\member{doc}}{doc string, a string or \code{None}} +\lineiii{}{\member{code}}{the body of the function} +\hline + +\lineiii{Getattr}{\member{expr}}{} +\lineiii{}{\member{attrname}}{} +\hline + +\lineiii{Global}{\member{names}}{} +\hline + +\lineiii{If}{\member{tests}}{} +\lineiii{}{\member{else_}}{} +\hline + +\lineiii{Import}{\member{names}}{} +\hline + +\lineiii{Invert}{\member{expr}}{} +\hline + +\lineiii{Keyword}{\member{name}}{} +\lineiii{}{\member{expr}}{} +\hline + +\lineiii{Lambda}{\member{argnames}}{} +\lineiii{}{\member{defaults}}{} +\lineiii{}{\member{flags}}{} +\lineiii{}{\member{code}}{} +\hline + +\lineiii{LeftShift}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{List}{\member{nodes}}{} +\hline + +\lineiii{ListComp}{\member{expr}}{} +\lineiii{}{\member{quals}}{} +\hline + +\lineiii{ListCompFor}{\member{assign}}{} +\lineiii{}{\member{list}}{} +\lineiii{}{\member{ifs}}{} +\hline + +\lineiii{ListCompIf}{\member{test}}{} +\hline + +\lineiii{Mod}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{Module}{\member{doc}}{doc string, a string or \code{None}} +\lineiii{}{\member{node}}{body of the module, a \class{Stmt}} +\hline + +\lineiii{Mul}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{Name}{\member{name}}{} +\hline + +\lineiii{Not}{\member{expr}}{} +\hline + +\lineiii{Or}{\member{nodes}}{} +\hline + +\lineiii{Pass}{}{} +\hline + +\lineiii{Power}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{Print}{\member{nodes}}{} +\lineiii{}{\member{dest}}{} +\hline + +\lineiii{Printnl}{\member{nodes}}{} +\lineiii{}{\member{dest}}{} +\hline + +\lineiii{Raise}{\member{expr1}}{} +\lineiii{}{\member{expr2}}{} +\lineiii{}{\member{expr3}}{} +\hline + +\lineiii{Return}{\member{value}}{} +\hline + +\lineiii{RightShift}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{Slice}{\member{expr}}{} +\lineiii{}{\member{flags}}{} +\lineiii{}{\member{lower}}{} +\lineiii{}{\member{upper}}{} +\hline + +\lineiii{Sliceobj}{\member{nodes}}{list of statements} +\hline + +\lineiii{Stmt}{\member{nodes}}{} +\hline + +\lineiii{Sub}{\member{left}}{} +\lineiii{}{\member{right}}{} +\hline + +\lineiii{Subscript}{\member{expr}}{} +\lineiii{}{\member{flags}}{} +\lineiii{}{\member{subs}}{} +\hline + +\lineiii{TryExcept}{\member{body}}{} +\lineiii{}{\member{handlers}}{} +\lineiii{}{\member{else_}}{} +\hline + +\lineiii{TryFinally}{\member{body}}{} +\lineiii{}{\member{final}}{} +\hline + +\lineiii{Tuple}{\member{nodes}}{} +\hline + +\lineiii{UnaryAdd}{\member{expr}}{} +\hline + +\lineiii{UnarySub}{\member{expr}}{} +\hline + +\lineiii{While}{\member{test}}{} +\lineiii{}{\member{body}}{} +\lineiii{}{\member{else_}}{} +\hline + +\lineiii{Yield}{\member{value}}{} +\hline + +\end{longtableiii} diff --git a/Doc/lib/compiler.tex b/Doc/lib/compiler.tex new file mode 100644 index 0000000..280ee0f --- /dev/null +++ b/Doc/lib/compiler.tex @@ -0,0 +1,336 @@ +\chapter{Python compiler package \label{compiler}} + +\sectionauthor{Jeremy Hylton}{jeremy@zope.com} + + +The Python compiler package is a tool for analyzing Python source code +and generating Python bytecode. The compiler contains libraries to +generate an abstract syntax tree from Python source code and to +generate Python bytecode from the tree. + +The \refmodule{compiler} package is a Python source to bytecode +translator written in Python. It uses the built-in parser and +standard \refmodule{parser} module to generated a concrete syntax +tree. This tree is used to generate an abstract syntax tree (AST) and +then Python bytecode. + +The full functionality of the package duplicates the builtin compiler +provided with the Python interpreter. It is intended to match its +behavior almost exactly. Why implement another compiler that does the +same thing? The package is useful for a variety of purposes. It can +be modified more easily than the builtin compiler. The AST it +generates is useful for analyzing Python source code. + +This chapter explains how the various components of the +\refmodule{compiler} package work. It blends reference material with +a tutorial. + +The following modules are part of the \refmodule{compiler} package: + +\localmoduletable + + +\subsection{The basic interface} + +\declaremodule{}{compiler} + +The top-level of the package defines four functions. If you import +\module{compiler}, you will get these functions and a collection of +modules contained in the package. + +\begin{funcdesc}{parse}{buf} +Returns an abstract syntax tree for the Python source code in \var{buf}. +The function raises SyntaxError if there is an error in the source +code. The return value is a \class{compiler.ast.Module} instance that +contains the tree. +\end{funcdesc} + +\begin{funcdesc}{parseFile}{path} +Return an abstract syntax tree for the Python source code in the file +specified by \var{path}. It is equivalent to +\code{parse(open(\var{path}).read())}. +\end{funcdesc} + +\begin{funcdesc}{walk}{ast, visitor\optional{, verbose}} +Do a pre-order walk over the abstract syntax tree \var{ast}. Call the +appropriate method on the \var{visitor} instance for each node +encountered. +\end{funcdesc} + +\begin{funcdesc}{compile}{path} +Compile the file \var{path} and generate the corresponding \file{.pyc} +file. +\end{funcdesc} + +The \module{compiler} package contains the following modules: +\refmodule[compiler.ast]{ast}, \module{consts}, \module{future}, +\module{misc}, \module{pyassem}, \module{pycodegen}, \module{symbols}, +\module{transformer}, and \refmodule[compiler.visitor]{visitor}. + +\subsection{Limitations} + +There are some problems with the error checking of the compiler +package. The interpreter detects syntax errors in two distinct +phases. One set of errors is detected by the interpreter's parser, +the other set by the compiler. The compiler package relies on the +interpreter's parser, so it get the first phases of error checking for +free. It implements the second phase itself, and that implement is +incomplete. For example, the compiler package does not raise an error +if a name appears more than once in an argument list: +\code{def f(x, x): ...} + +A future version of the compiler should fix these problems. + +\section{Python Abstract Syntax} + +The \module{compiler.ast} module defines an abstract syntax for +Python. In the abstract syntax tree, each node represents a syntactic +construct. The root of the tree is \class{Module} object. + +The abstract syntax offers a higher level interface to parsed Python +source code. The \ulink{\module{parser}} +{http://www.python.org/doc/current/lib/module-parser.html} +module and the compiler written in C for the Python interpreter use a +concrete syntax tree. The concrete syntax is tied closely to the +grammar description used for the Python parser. Instead of a single +node for a construct, there are often several levels of nested nodes +that are introduced by Python's precedence rules. + +The abstract syntax tree is created by the +\module{compiler.transformer} module. The transformer relies on the +builtin Python parser to generate a concrete syntax tree. It +generates an abstract syntax tree from the concrete tree. + +The \module{transformer} module was created by Greg +Stein\index{Stein, Greg} and Bill Tutt\index{Tutt, Bill} for an +experimental Python-to-C compiler. The current version contains a +number of modifications and improvements, but the basic form of the +abstract syntax and of the transformer are due to Stein and Tutt. + +\subsection{AST Nodes} + +\declaremodule{}{compiler.ast} + +The \module{compiler.ast} module is generated from a text file that +describes each node type and its elements. Each node type is +represented as a class that inherits from the abstract base class +\class{compiler.ast.Node} and defines a set of named attributes for +child nodes. + +\begin{classdesc}{Node}{} + + The \class{Node} instances are created automatically by the parser + generator. The recommended interface for specific \class{Node} + instances is to use the public attributes to access child nodes. A + public attribute may be bound to a single node or to a sequence of + nodes, depending on the \class{Node} type. For example, the + \member{bases} attribute of the \class{Class} node, is bound to a + list of base class nodes, and the \member{doc} attribute is bound to + a single node. + + Each \class{Node} instance has a \member{lineno} attribute which may + be \code{None}. XXX Not sure what the rules are for which nodes + will have a useful lineno. +\end{classdesc} + +All \class{Node} objects offer the following methods: + +\begin{methoddesc}{getChildren}{} + Returns a flattened list of the child nodes and objects in the + order they occur. Specifically, the order of the nodes is the + order in which they appear in the Python grammar. Not all of the + children are \class{Node} instances. The names of functions and + classes, for example, are plain strings. +\end{methoddesc} + +\begin{methoddesc}{getChildNodes}{} + Returns a flattened list of the child nodes in the order they + occur. This method is like \method{getChildren()}, except that it + only returns those children that are \class{Node} instances. +\end{methoddesc} + +Two examples illustrate the general structure of \class{Node} +classes. The \keyword{while} statement is defined by the following +grammar production: + +\begin{verbatim} +while_stmt: "while" expression ":" suite + ["else" ":" suite] +\end{verbatim} + +The \class{While} node has three attributes: \member{test}, +\member{body}, and \member{else_}. (If the natural name for an +attribute is also a Python reserved word, it can't be used as an +attribute name. An underscore is appended to the word to make it a +legal identifier, hence \member{else_} instead of \keyword{else}.) + +The \keyword{if} statement is more complicated because it can include +several tests. + +\begin{verbatim} +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +\end{verbatim} + +The \class{If} node only defines two attributes: \member{tests} and +\member{else_}. The \member{tests} attribute is a sequence of test +expression, consequent body pairs. There is one pair for each +\keyword{if}/\keyword{elif} clause. The first element of the pair is +the test expression. The second elements is a \class{Stmt} node that +contains the code to execute if the test is true. + +The \method{getChildren()} method of \class{If} returns a flat list of +child nodes. If there are three \keyword{if}/\keyword{elif} clauses +and no \keyword{else} clause, then \method{getChildren()} will return +a list of six elements: the first test expression, the first +\class{Stmt}, the second text expression, etc. + +The following table lists each of the \class{Node} subclasses defined +in \module{compiler.ast} and each of the public attributes available +on their instances. The values of most of the attributes are +themselves \class{Node} instances or sequences of instances. When the +value is something other than an instance, the type is noted in the +comment. The attributes are listed in the order in which they are +returned by \method{getChildren()} and \method{getChildNodes()}. + +\input{asttable} + + +\subsection{Assignment nodes} + +There is a collection of nodes used to represent assignments. Each +assignment statement in the source code becomes a single +\class{Assign} node in the AST. The \member{nodes} attribute is a +list that contains a node for each assignment target. This is +necessary because assignment can be chained, e.g. \code{a = b = 2}. +Each \class{Node} in the list will be one of the following classes: +\class{AssAttr}, \class{AssList}, \class{AssName}, or +\class{AssTuple}. + +Each target assignment node will describe the kind of object being +assigned to: \class{AssName} for a simple name, e.g. \code{a = 1}. +\class{AssAttr} for an attribute assigned, e.g. \code{a.x = 1}. +\class{AssList} and \class{AssTuple} for list and tuple expansion +respectively, e.g. \code{a, b, c = a_tuple}. + +The target assignment nodes also have a \member{flags} attribute that +indicates whether the node is being used for assignment or in a delete +statement. The \class{AssName} is also used to represent a delete +statement, e.g. \class{del x}. + +When an expression contains several attribute references, an +assignment or delete statement will contain only one \class{AssAttr} +node -- for the final attribute reference. The other attribute +references will be represented as \class{Getattr} nodes in the +\member{expr} attribute of the \class{AssAttr} instance. + +\subsection{Examples} + +This section shows several simple examples of ASTs for Python source +code. The examples demonstrate how to use the \function{parse()} +function, what the repr of an AST looks like, and how to access +attributes of an AST node. + +The first module defines a single function. Assume it is stored in +\file{/tmp/doublelib.py}. + +\begin{verbatim} +"""This is an example module. + +This is the docstring. +""" + +def double(x): + "Return twice the argument" + return x * 2 +\end{verbatim} + +In the interactive interpreter session below, I have reformatted the +long AST reprs for readability. The AST reprs use unqualified class +names. If you want to create an instance from a repr, you must import +the class names from the \module{compiler.ast} module. + +\begin{verbatim} +>>> import compiler +>>> mod = compiler.parseFile("/tmp/doublelib.py") +>>> mod +Module('This is an example module.\n\nThis is the docstring.\n', + Stmt([Function('double', ['x'], [], 0, 'Return twice the argument', + Stmt([Return(Mul((Name('x'), Const(2))))]))])) +>>> from compiler.ast import * +>>> Module('This is an example module.\n\nThis is the docstring.\n', +... Stmt([Function('double', ['x'], [], 0, 'Return twice the argument', +... Stmt([Return(Mul((Name('x'), Const(2))))]))])) +Module('This is an example module.\n\nThis is the docstring.\n', + Stmt([Function('double', ['x'], [], 0, 'Return twice the argument', + Stmt([Return(Mul((Name('x'), Const(2))))]))])) +>>> mod.doc +'This is an example module.\n\nThis is the docstring.\n' +>>> for node in mod.node.nodes: +... print node +... +Function('double', ['x'], [], 0, 'Return twice the argument', + Stmt([Return(Mul((Name('x'), Const(2))))])) +>>> func = mod.node.nodes[0] +>>> func.code +Stmt([Return(Mul((Name('x'), Const(2))))]) +\end{verbatim} + +\section{Using Visitors to Walk ASTs} + +\declaremodule{}{compiler.visitor} + +The visitor pattern is ... The \refmodule{compiler} package uses a +variant on the visitor pattern that takes advantage of Python's +introspection features to elminiate the need for much of the visitor's +infrastructure. + +The classes being visited do not need to be programmed to accept +visitors. The visitor need only define visit methods for classes it +is specifically interested in; a default visit method can handle the +rest. + +XXX The magic \method{visit()} method for visitors. + +\begin{funcdesc}{walk}{tree, visitor\optional{, verbose}} +\end{funcdesc} + +\begin{classdesc}{ASTVisitor}{} + +The \class{ASTVisitor} is responsible for walking over the tree in the +correct order. A walk begins with a call to \method{preorder()}. For +each node, it checks the \var{visitor} argument to \method{preorder()} +for a method named `visitNodeType,' where NodeType is the name of the +node's class, e.g. for a \class{While} node a \method{visitWhile()} +would be called. If the method exists, it is called with the node as +its first argument. + +The visitor method for a particular node type can control how child +nodes are visited during the walk. The \class{ASTVisitor} modifies +the visitor argument by adding a visit method to the visitor; this +method can be used to visit a particular child node. If no visitor is +found for a particular node type, the \method{default()} method is +called. +\end{classdesc} + +\class{ASTVisitor} objects have the following methods: + +XXX describe extra arguments + +\begin{methoddesc}{default}{node\optional{, \moreargs}} +\end{methoddesc} + +\begin{methoddesc}{dispatch}{node\optional{, \moreargs}} +\end{methoddesc} + +\begin{methoddesc}{preorder}{tree, visitor} +\end{methoddesc} + + +\section{Bytecode Generation} + +The code generator is a visitor that emits bytecodes. Each visit method +can call the \method{emit()} method to emit a new bytecode. The basic +code generator is specialized for modules, classes, and functions. An +assembler converts that emitted instructions to the low-level bytecode +format. It handles things like generator of constant lists of code +objects and calculation of jump offsets. |