summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2001-09-27 20:06:07 (GMT)
committerFred Drake <fdrake@acm.org>2001-09-27 20:06:07 (GMT)
commite2f9917f9f2c573f84605d75d70ef4fb3d54a740 (patch)
tree19039f1764ba5919de36782080db88e4118b0c6c
parent876389e5d8c752a4e9088bd6b02863679b4a5047 (diff)
downloadcpython-e2f9917f9f2c573f84605d75d70ef4fb3d54a740.zip
cpython-e2f9917f9f2c573f84605d75d70ef4fb3d54a740.tar.gz
cpython-e2f9917f9f2c573f84605d75d70ef4fb3d54a740.tar.bz2
Migrate the compiler documentation from the Tools/compiler/doc/ directory.
Changes made to make it work in the new location.
-rw-r--r--Doc/lib/asttable.tex253
-rw-r--r--Doc/lib/compiler.tex336
2 files changed, 589 insertions, 0 deletions
diff --git a/Doc/lib/asttable.tex b/Doc/lib/asttable.tex
new file mode 100644
index 0000000..7f6ba9f
--- /dev/null
+++ b/Doc/lib/asttable.tex
@@ -0,0 +1,253 @@
+\begin{longtableiii}{lll}{class}{Node type}{Attribute}{Value}
+
+\lineiii{Add}{\member{left}}{left operand}
+\lineiii{}{\member{right}}{right operand}
+\hline
+
+\lineiii{And}{\member{nodes}}{list of operands}
+\hline
+
+\lineiii{AssAttr}{}{\emph{attribute as target of assignment}}
+\lineiii{}{\member{expr}}{expression on the left-hand side of the dot}
+\lineiii{}{\member{attrname}}{the attribute name, a string}
+\lineiii{}{\member{flags}}{XXX}
+\hline
+
+\lineiii{AssList}{\member{nodes}}{list of list elements being assigned to}
+\hline
+
+\lineiii{AssName}{\member{name}}{name being assigned to}
+\lineiii{}{\member{flags}}{XXX}
+\hline
+
+\lineiii{AssTuple}{\member{nodes}}{list of tuple elements being assigned to}
+\hline
+
+\lineiii{Assert}{\member{test}}{the expression to be tested}
+\lineiii{}{\member{fail}}{the value of the \exception{AssertionError}}
+\hline
+
+\lineiii{Assign}{\member{nodes}}{a list of assignment targets, one per equal sign}
+\lineiii{}{\member{expr}}{the value being assigned}
+\hline
+
+\lineiii{AugAssign}{\member{node}}{}
+\lineiii{}{\member{op}}{}
+\lineiii{}{\member{expr}}{}
+\hline
+
+\lineiii{Backquote}{\member{expr}}{}
+\hline
+
+\lineiii{Bitand}{\member{nodes}}{}
+\hline
+
+\lineiii{Bitor}{\member{nodes}}{}
+\hline
+
+\lineiii{Bitxor}{\member{nodes}}{}
+\hline
+
+\lineiii{Break}{}{}
+\hline
+
+\lineiii{CallFunc}{\member{node}}{expression for the callee}
+\lineiii{}{\member{args}}{a list of arguments}
+\lineiii{}{\member{star_args}}{the extended *-arg value}
+\lineiii{}{\member{dstar_args}}{the extended **-arg value}
+\hline
+
+\lineiii{Class}{\member{name}}{the name of the class, a string}
+\lineiii{}{\member{bases}}{a list of base classes}
+\lineiii{}{\member{doc}}{doc string, a string or \code{None}}
+\lineiii{}{\member{code}}{the body of the class statement}
+\hline
+
+\lineiii{Compare}{\member{expr}}{}
+\lineiii{}{\member{ops}}{}
+\hline
+
+\lineiii{Const}{\member{value}}{}
+\hline
+
+\lineiii{Continue}{}{}
+\hline
+
+\lineiii{Dict}{\member{items}}{}
+\hline
+
+\lineiii{Discard}{\member{expr}}{}
+\hline
+
+\lineiii{Div}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{Ellipsis}{}{}
+\hline
+
+\lineiii{Exec}{\member{expr}}{}
+\lineiii{}{\member{locals}}{}
+\lineiii{}{\member{globals}}{}
+\hline
+
+\lineiii{For}{\member{assign}}{}
+\lineiii{}{\member{list}}{}
+\lineiii{}{\member{body}}{}
+\lineiii{}{\member{else_}}{}
+\hline
+
+\lineiii{From}{\member{modname}}{}
+\lineiii{}{\member{names}}{}
+\hline
+
+\lineiii{Function}{\member{name}}{name used in def, a string}
+\lineiii{}{\member{argnames}}{list of argument names, as strings}
+\lineiii{}{\member{defaults}}{list of default values}
+\lineiii{}{\member{flags}}{xxx}
+\lineiii{}{\member{doc}}{doc string, a string or \code{None}}
+\lineiii{}{\member{code}}{the body of the function}
+\hline
+
+\lineiii{Getattr}{\member{expr}}{}
+\lineiii{}{\member{attrname}}{}
+\hline
+
+\lineiii{Global}{\member{names}}{}
+\hline
+
+\lineiii{If}{\member{tests}}{}
+\lineiii{}{\member{else_}}{}
+\hline
+
+\lineiii{Import}{\member{names}}{}
+\hline
+
+\lineiii{Invert}{\member{expr}}{}
+\hline
+
+\lineiii{Keyword}{\member{name}}{}
+\lineiii{}{\member{expr}}{}
+\hline
+
+\lineiii{Lambda}{\member{argnames}}{}
+\lineiii{}{\member{defaults}}{}
+\lineiii{}{\member{flags}}{}
+\lineiii{}{\member{code}}{}
+\hline
+
+\lineiii{LeftShift}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{List}{\member{nodes}}{}
+\hline
+
+\lineiii{ListComp}{\member{expr}}{}
+\lineiii{}{\member{quals}}{}
+\hline
+
+\lineiii{ListCompFor}{\member{assign}}{}
+\lineiii{}{\member{list}}{}
+\lineiii{}{\member{ifs}}{}
+\hline
+
+\lineiii{ListCompIf}{\member{test}}{}
+\hline
+
+\lineiii{Mod}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{Module}{\member{doc}}{doc string, a string or \code{None}}
+\lineiii{}{\member{node}}{body of the module, a \class{Stmt}}
+\hline
+
+\lineiii{Mul}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{Name}{\member{name}}{}
+\hline
+
+\lineiii{Not}{\member{expr}}{}
+\hline
+
+\lineiii{Or}{\member{nodes}}{}
+\hline
+
+\lineiii{Pass}{}{}
+\hline
+
+\lineiii{Power}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{Print}{\member{nodes}}{}
+\lineiii{}{\member{dest}}{}
+\hline
+
+\lineiii{Printnl}{\member{nodes}}{}
+\lineiii{}{\member{dest}}{}
+\hline
+
+\lineiii{Raise}{\member{expr1}}{}
+\lineiii{}{\member{expr2}}{}
+\lineiii{}{\member{expr3}}{}
+\hline
+
+\lineiii{Return}{\member{value}}{}
+\hline
+
+\lineiii{RightShift}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{Slice}{\member{expr}}{}
+\lineiii{}{\member{flags}}{}
+\lineiii{}{\member{lower}}{}
+\lineiii{}{\member{upper}}{}
+\hline
+
+\lineiii{Sliceobj}{\member{nodes}}{list of statements}
+\hline
+
+\lineiii{Stmt}{\member{nodes}}{}
+\hline
+
+\lineiii{Sub}{\member{left}}{}
+\lineiii{}{\member{right}}{}
+\hline
+
+\lineiii{Subscript}{\member{expr}}{}
+\lineiii{}{\member{flags}}{}
+\lineiii{}{\member{subs}}{}
+\hline
+
+\lineiii{TryExcept}{\member{body}}{}
+\lineiii{}{\member{handlers}}{}
+\lineiii{}{\member{else_}}{}
+\hline
+
+\lineiii{TryFinally}{\member{body}}{}
+\lineiii{}{\member{final}}{}
+\hline
+
+\lineiii{Tuple}{\member{nodes}}{}
+\hline
+
+\lineiii{UnaryAdd}{\member{expr}}{}
+\hline
+
+\lineiii{UnarySub}{\member{expr}}{}
+\hline
+
+\lineiii{While}{\member{test}}{}
+\lineiii{}{\member{body}}{}
+\lineiii{}{\member{else_}}{}
+\hline
+
+\lineiii{Yield}{\member{value}}{}
+\hline
+
+\end{longtableiii}
diff --git a/Doc/lib/compiler.tex b/Doc/lib/compiler.tex
new file mode 100644
index 0000000..280ee0f
--- /dev/null
+++ b/Doc/lib/compiler.tex
@@ -0,0 +1,336 @@
+\chapter{Python compiler package \label{compiler}}
+
+\sectionauthor{Jeremy Hylton}{jeremy@zope.com}
+
+
+The Python compiler package is a tool for analyzing Python source code
+and generating Python bytecode. The compiler contains libraries to
+generate an abstract syntax tree from Python source code and to
+generate Python bytecode from the tree.
+
+The \refmodule{compiler} package is a Python source to bytecode
+translator written in Python. It uses the built-in parser and
+standard \refmodule{parser} module to generated a concrete syntax
+tree. This tree is used to generate an abstract syntax tree (AST) and
+then Python bytecode.
+
+The full functionality of the package duplicates the builtin compiler
+provided with the Python interpreter. It is intended to match its
+behavior almost exactly. Why implement another compiler that does the
+same thing? The package is useful for a variety of purposes. It can
+be modified more easily than the builtin compiler. The AST it
+generates is useful for analyzing Python source code.
+
+This chapter explains how the various components of the
+\refmodule{compiler} package work. It blends reference material with
+a tutorial.
+
+The following modules are part of the \refmodule{compiler} package:
+
+\localmoduletable
+
+
+\subsection{The basic interface}
+
+\declaremodule{}{compiler}
+
+The top-level of the package defines four functions. If you import
+\module{compiler}, you will get these functions and a collection of
+modules contained in the package.
+
+\begin{funcdesc}{parse}{buf}
+Returns an abstract syntax tree for the Python source code in \var{buf}.
+The function raises SyntaxError if there is an error in the source
+code. The return value is a \class{compiler.ast.Module} instance that
+contains the tree.
+\end{funcdesc}
+
+\begin{funcdesc}{parseFile}{path}
+Return an abstract syntax tree for the Python source code in the file
+specified by \var{path}. It is equivalent to
+\code{parse(open(\var{path}).read())}.
+\end{funcdesc}
+
+\begin{funcdesc}{walk}{ast, visitor\optional{, verbose}}
+Do a pre-order walk over the abstract syntax tree \var{ast}. Call the
+appropriate method on the \var{visitor} instance for each node
+encountered.
+\end{funcdesc}
+
+\begin{funcdesc}{compile}{path}
+Compile the file \var{path} and generate the corresponding \file{.pyc}
+file.
+\end{funcdesc}
+
+The \module{compiler} package contains the following modules:
+\refmodule[compiler.ast]{ast}, \module{consts}, \module{future},
+\module{misc}, \module{pyassem}, \module{pycodegen}, \module{symbols},
+\module{transformer}, and \refmodule[compiler.visitor]{visitor}.
+
+\subsection{Limitations}
+
+There are some problems with the error checking of the compiler
+package. The interpreter detects syntax errors in two distinct
+phases. One set of errors is detected by the interpreter's parser,
+the other set by the compiler. The compiler package relies on the
+interpreter's parser, so it get the first phases of error checking for
+free. It implements the second phase itself, and that implement is
+incomplete. For example, the compiler package does not raise an error
+if a name appears more than once in an argument list:
+\code{def f(x, x): ...}
+
+A future version of the compiler should fix these problems.
+
+\section{Python Abstract Syntax}
+
+The \module{compiler.ast} module defines an abstract syntax for
+Python. In the abstract syntax tree, each node represents a syntactic
+construct. The root of the tree is \class{Module} object.
+
+The abstract syntax offers a higher level interface to parsed Python
+source code. The \ulink{\module{parser}}
+{http://www.python.org/doc/current/lib/module-parser.html}
+module and the compiler written in C for the Python interpreter use a
+concrete syntax tree. The concrete syntax is tied closely to the
+grammar description used for the Python parser. Instead of a single
+node for a construct, there are often several levels of nested nodes
+that are introduced by Python's precedence rules.
+
+The abstract syntax tree is created by the
+\module{compiler.transformer} module. The transformer relies on the
+builtin Python parser to generate a concrete syntax tree. It
+generates an abstract syntax tree from the concrete tree.
+
+The \module{transformer} module was created by Greg
+Stein\index{Stein, Greg} and Bill Tutt\index{Tutt, Bill} for an
+experimental Python-to-C compiler. The current version contains a
+number of modifications and improvements, but the basic form of the
+abstract syntax and of the transformer are due to Stein and Tutt.
+
+\subsection{AST Nodes}
+
+\declaremodule{}{compiler.ast}
+
+The \module{compiler.ast} module is generated from a text file that
+describes each node type and its elements. Each node type is
+represented as a class that inherits from the abstract base class
+\class{compiler.ast.Node} and defines a set of named attributes for
+child nodes.
+
+\begin{classdesc}{Node}{}
+
+ The \class{Node} instances are created automatically by the parser
+ generator. The recommended interface for specific \class{Node}
+ instances is to use the public attributes to access child nodes. A
+ public attribute may be bound to a single node or to a sequence of
+ nodes, depending on the \class{Node} type. For example, the
+ \member{bases} attribute of the \class{Class} node, is bound to a
+ list of base class nodes, and the \member{doc} attribute is bound to
+ a single node.
+
+ Each \class{Node} instance has a \member{lineno} attribute which may
+ be \code{None}. XXX Not sure what the rules are for which nodes
+ will have a useful lineno.
+\end{classdesc}
+
+All \class{Node} objects offer the following methods:
+
+\begin{methoddesc}{getChildren}{}
+ Returns a flattened list of the child nodes and objects in the
+ order they occur. Specifically, the order of the nodes is the
+ order in which they appear in the Python grammar. Not all of the
+ children are \class{Node} instances. The names of functions and
+ classes, for example, are plain strings.
+\end{methoddesc}
+
+\begin{methoddesc}{getChildNodes}{}
+ Returns a flattened list of the child nodes in the order they
+ occur. This method is like \method{getChildren()}, except that it
+ only returns those children that are \class{Node} instances.
+\end{methoddesc}
+
+Two examples illustrate the general structure of \class{Node}
+classes. The \keyword{while} statement is defined by the following
+grammar production:
+
+\begin{verbatim}
+while_stmt: "while" expression ":" suite
+ ["else" ":" suite]
+\end{verbatim}
+
+The \class{While} node has three attributes: \member{test},
+\member{body}, and \member{else_}. (If the natural name for an
+attribute is also a Python reserved word, it can't be used as an
+attribute name. An underscore is appended to the word to make it a
+legal identifier, hence \member{else_} instead of \keyword{else}.)
+
+The \keyword{if} statement is more complicated because it can include
+several tests.
+
+\begin{verbatim}
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+\end{verbatim}
+
+The \class{If} node only defines two attributes: \member{tests} and
+\member{else_}. The \member{tests} attribute is a sequence of test
+expression, consequent body pairs. There is one pair for each
+\keyword{if}/\keyword{elif} clause. The first element of the pair is
+the test expression. The second elements is a \class{Stmt} node that
+contains the code to execute if the test is true.
+
+The \method{getChildren()} method of \class{If} returns a flat list of
+child nodes. If there are three \keyword{if}/\keyword{elif} clauses
+and no \keyword{else} clause, then \method{getChildren()} will return
+a list of six elements: the first test expression, the first
+\class{Stmt}, the second text expression, etc.
+
+The following table lists each of the \class{Node} subclasses defined
+in \module{compiler.ast} and each of the public attributes available
+on their instances. The values of most of the attributes are
+themselves \class{Node} instances or sequences of instances. When the
+value is something other than an instance, the type is noted in the
+comment. The attributes are listed in the order in which they are
+returned by \method{getChildren()} and \method{getChildNodes()}.
+
+\input{asttable}
+
+
+\subsection{Assignment nodes}
+
+There is a collection of nodes used to represent assignments. Each
+assignment statement in the source code becomes a single
+\class{Assign} node in the AST. The \member{nodes} attribute is a
+list that contains a node for each assignment target. This is
+necessary because assignment can be chained, e.g. \code{a = b = 2}.
+Each \class{Node} in the list will be one of the following classes:
+\class{AssAttr}, \class{AssList}, \class{AssName}, or
+\class{AssTuple}.
+
+Each target assignment node will describe the kind of object being
+assigned to: \class{AssName} for a simple name, e.g. \code{a = 1}.
+\class{AssAttr} for an attribute assigned, e.g. \code{a.x = 1}.
+\class{AssList} and \class{AssTuple} for list and tuple expansion
+respectively, e.g. \code{a, b, c = a_tuple}.
+
+The target assignment nodes also have a \member{flags} attribute that
+indicates whether the node is being used for assignment or in a delete
+statement. The \class{AssName} is also used to represent a delete
+statement, e.g. \class{del x}.
+
+When an expression contains several attribute references, an
+assignment or delete statement will contain only one \class{AssAttr}
+node -- for the final attribute reference. The other attribute
+references will be represented as \class{Getattr} nodes in the
+\member{expr} attribute of the \class{AssAttr} instance.
+
+\subsection{Examples}
+
+This section shows several simple examples of ASTs for Python source
+code. The examples demonstrate how to use the \function{parse()}
+function, what the repr of an AST looks like, and how to access
+attributes of an AST node.
+
+The first module defines a single function. Assume it is stored in
+\file{/tmp/doublelib.py}.
+
+\begin{verbatim}
+"""This is an example module.
+
+This is the docstring.
+"""
+
+def double(x):
+ "Return twice the argument"
+ return x * 2
+\end{verbatim}
+
+In the interactive interpreter session below, I have reformatted the
+long AST reprs for readability. The AST reprs use unqualified class
+names. If you want to create an instance from a repr, you must import
+the class names from the \module{compiler.ast} module.
+
+\begin{verbatim}
+>>> import compiler
+>>> mod = compiler.parseFile("/tmp/doublelib.py")
+>>> mod
+Module('This is an example module.\n\nThis is the docstring.\n',
+ Stmt([Function('double', ['x'], [], 0, 'Return twice the argument',
+ Stmt([Return(Mul((Name('x'), Const(2))))]))]))
+>>> from compiler.ast import *
+>>> Module('This is an example module.\n\nThis is the docstring.\n',
+... Stmt([Function('double', ['x'], [], 0, 'Return twice the argument',
+... Stmt([Return(Mul((Name('x'), Const(2))))]))]))
+Module('This is an example module.\n\nThis is the docstring.\n',
+ Stmt([Function('double', ['x'], [], 0, 'Return twice the argument',
+ Stmt([Return(Mul((Name('x'), Const(2))))]))]))
+>>> mod.doc
+'This is an example module.\n\nThis is the docstring.\n'
+>>> for node in mod.node.nodes:
+... print node
+...
+Function('double', ['x'], [], 0, 'Return twice the argument',
+ Stmt([Return(Mul((Name('x'), Const(2))))]))
+>>> func = mod.node.nodes[0]
+>>> func.code
+Stmt([Return(Mul((Name('x'), Const(2))))])
+\end{verbatim}
+
+\section{Using Visitors to Walk ASTs}
+
+\declaremodule{}{compiler.visitor}
+
+The visitor pattern is ... The \refmodule{compiler} package uses a
+variant on the visitor pattern that takes advantage of Python's
+introspection features to elminiate the need for much of the visitor's
+infrastructure.
+
+The classes being visited do not need to be programmed to accept
+visitors. The visitor need only define visit methods for classes it
+is specifically interested in; a default visit method can handle the
+rest.
+
+XXX The magic \method{visit()} method for visitors.
+
+\begin{funcdesc}{walk}{tree, visitor\optional{, verbose}}
+\end{funcdesc}
+
+\begin{classdesc}{ASTVisitor}{}
+
+The \class{ASTVisitor} is responsible for walking over the tree in the
+correct order. A walk begins with a call to \method{preorder()}. For
+each node, it checks the \var{visitor} argument to \method{preorder()}
+for a method named `visitNodeType,' where NodeType is the name of the
+node's class, e.g. for a \class{While} node a \method{visitWhile()}
+would be called. If the method exists, it is called with the node as
+its first argument.
+
+The visitor method for a particular node type can control how child
+nodes are visited during the walk. The \class{ASTVisitor} modifies
+the visitor argument by adding a visit method to the visitor; this
+method can be used to visit a particular child node. If no visitor is
+found for a particular node type, the \method{default()} method is
+called.
+\end{classdesc}
+
+\class{ASTVisitor} objects have the following methods:
+
+XXX describe extra arguments
+
+\begin{methoddesc}{default}{node\optional{, \moreargs}}
+\end{methoddesc}
+
+\begin{methoddesc}{dispatch}{node\optional{, \moreargs}}
+\end{methoddesc}
+
+\begin{methoddesc}{preorder}{tree, visitor}
+\end{methoddesc}
+
+
+\section{Bytecode Generation}
+
+The code generator is a visitor that emits bytecodes. Each visit method
+can call the \method{emit()} method to emit a new bytecode. The basic
+code generator is specialized for modules, classes, and functions. An
+assembler converts that emitted instructions to the low-level bytecode
+format. It handles things like generator of constant lists of code
+objects and calculation of jump offsets.