summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Tools/compiler/doc/compiler.tex166
1 files changed, 81 insertions, 85 deletions
diff --git a/Tools/compiler/doc/compiler.tex b/Tools/compiler/doc/compiler.tex
index f39b734..824ff08 100644
--- a/Tools/compiler/doc/compiler.tex
+++ b/Tools/compiler/doc/compiler.tex
@@ -5,7 +5,7 @@
%
% http://www.python.org/doc/current/doc/doc.html
-\documentclass{manual}
+\documentclass{howto}
\title{Python compiler package}
@@ -15,7 +15,7 @@
% the rest is at your discretion.
\authoraddress{
PythonLabs \\
- Zope Corp. \\
+ Zope Corporation \\
Email: \email{jeremy@zope.com}
}
@@ -36,13 +36,6 @@
\maketitle
-% This makes the contents more accessible from the front page of the HTML.
-\ifhtml
-\chapter*{Front Matter\label{front}}
-\fi
-
-%\input{copyright}
-
\begin{abstract}
\noindent
@@ -55,15 +48,19 @@ generate Python bytecode from the tree.
\tableofcontents
-\chapter{Introduction\label{Introduction}}
+
+\section{Introduction\label{Introduction}}
XXX Need basic intro
XXX what are the major advantages... the abstract syntax is much
closer to the python source...
+
\section{The basic interface}
+\declaremodule{}{compiler}
+
The top-level of the package defines four functions.
\begin{funcdesc}{parse}{buf}
@@ -79,20 +76,22 @@ specified by \var{path}. It is equivalent to
\code{parse(open(\var{path}).read())}.
\end{funcdesc}
-\begin{funcdesc}{walk}{ast, visitor, \optional{verbose=None}}
+\begin{funcdesc}{walk}{ast, visitor\optional{, verbose}}
Do a pre-order walk over the abstract syntax tree \var{ast}. Call the
appropriate method on the \var{visitor} instance for each node
-encountered.
+encountered.
\end{funcdesc}
-\begin{funcdesc}{compile}{filename}
-Compile the file \var{filename} and generated \var{filename}.pyc.
+\begin{funcdesc}{compile}{path}
+Compile the file \var{path} and generate the corresponding \file{.pyc}
+file.
\end{funcdesc}
The \module{compiler} package contains the following modules:
-\module{ast}, \module{consts}, \module{future}, \module{misc},
-\module{pyassem}, \module{pycodegen}, \module{symbols},
-\module{transformer}, and \module{visitor}.
+\refmodule[compiler.ast]{ast}, \module{consts}, \module{future},
+\module{misc}, \module{pyassem}, \module{pycodegen}, \module{symbols},
+\module{transformer}, and \refmodule[compiler.visitor]{visitor}.
+
\section{Limitations}
@@ -106,38 +105,43 @@ incomplete. For example, the compiler package does not raise an error
if a name appears more than once in an argument list:
\code{def f(x, x): ...}
-\chapter{Python Abstract Syntax}
-\section{Introduction}
+\section{Python Abstract Syntax}
The \module{compiler.ast} module defines an abstract syntax for
Python. In the abstract syntax tree, each node represents a syntactic
construct. The root of the tree is \class{Module} object.
The abstract syntax offers a higher level interface to parsed Python
-source code. The \module{parser} module and the compiler written in C
-for the Python interpreter use a concrete syntax tree. The concrete
-syntax is tied closely to the grammar description used for the Python
-parser. Instead of a single node for a construct, there are often
-several levels of nested nodes that are introduced by Python's
-precedence rules.
+source code. The \ulink{\module{parser}}
+{http://www.python.org/doc/current/lib/module-parser.html}
+module and the compiler written in C for the Python interpreter use a
+concrete syntax tree. The concrete syntax is tied closely to the
+grammar description used for the Python parser. Instead of a single
+node for a construct, there are often several levels of nested nodes
+that are introduced by Python's precedence rules.
The abstract syntax tree is created by the
\module{compiler.transformer} module. The transformer relies on the
builtin Python parser to generate a concrete syntax tree. It
generates an abstract syntax tree from the concrete tree.
-The \module{transformer} module was created by Greg Stein and Bill
-Tutt for the Python-to-C compiler. The current version contains a
+The \module{transformer} module was created by Greg
+Stein\index{Stein, Greg} and Bill Tutt\index{Tutt, Bill} for an
+experimental Python-to-C compiler. The current version contains a
number of modifications and improvements, but the basic form of the
abstract syntax and of the transformer are due to Stein and Tutt.
+
\section{AST Nodes}
-The \module{ast} module is generated from a text file that describes
-each node type and its elements. Each node type is represented as a
-class that inherits from the abstract base class \class{ast.Node} and
-defines a set of named attributes for child nodes.
+\declaremodule{}{compiler.ast}
+
+The \module{compiler.ast} module is generated from a text file that
+describes each node type and its elements. Each node type is
+represented as a class that inherits from the abstract base class
+\class{compiler.ast.Node} and defines a set of named attributes for
+child nodes.
\begin{classdesc}{Node}{}
@@ -153,26 +157,27 @@ defines a set of named attributes for child nodes.
Each \class{Node} instance has a \member{lineno} attribute which may
be \code{None}. XXX Not sure what the rules are for which nodes
will have a useful lineno.
+\end{classdesc}
- \begin{methoddesc}{getChildren}{}
- Returns a flattened list of the child nodes and objects in the
- order they occur. Specifically, the order of the nodes is the
- order in which they appear in the Python grammar. Not all of the
- children are \class{Node} instances. The names of functions and
- classes, for example, are plain strings.
- \end{methoddesc}
+All \class{Node} objects offer the following methods:
- \begin{methoddesc}{getChildNodes}{}
- Returns a flattened list of the child nodes in the order they
- occur. This method is like \method{getChildNodes()}, except that it
- only returns those children that are \class{Node} instances.
- \end{methoddesc}
+\begin{methoddesc}{getChildren}{}
+ Returns a flattened list of the child nodes and objects in the
+ order they occur. Specifically, the order of the nodes is the
+ order in which they appear in the Python grammar. Not all of the
+ children are \class{Node} instances. The names of functions and
+ classes, for example, are plain strings.
+\end{methoddesc}
-\end{classdesc}
+\begin{methoddesc}{getChildNodes}{}
+ Returns a flattened list of the child nodes in the order they
+ occur. This method is like \method{getChildren()}, except that it
+ only returns those children that are \class{Node} instances.
+\end{methoddesc}
Two examples illustrate the general structure of \class{Node}
-classes. The while statement is defined by the following grammar
-production:
+classes. The \keyword{while} statement is defined by the following
+grammar production:
\begin{verbatim}
while_stmt: "while" expression ":" suite
@@ -182,11 +187,11 @@ while_stmt: "while" expression ":" suite
The \class{While} node has three attributes: \member{test},
\member{body}, and \member{else_}. (If the natural name for an
attribute is also a Python reserved word, it can't be used as an
-attribute name. An underscore is appended to the word to make it
-legal, hence \code{else_} instead of \code{else}.)
+attribute name. An underscore is appended to the word to make it a
+legal identifier, hence \member{else_} instead of \keyword{else}.)
-The if statement is more complicated because it can include several
-tests.
+The \keyword{if} statement is more complicated because it can include
+several tests.
\begin{verbatim}
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
@@ -194,16 +199,16 @@ if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
The \class{If} node only defines two attributes: \member{tests} and
\member{else_}. The \member{tests} attribute is a sequence of test
-expression, consequent body pairs. There is one pair of each if/elif
-clause. The first element of the pair is the test expression. The
-second elements is a \class{Stmt} node that contains the code to
-execute if the test is true.
+expression, consequent body pairs. There is one pair for each
+\keyword{if}/\keyword{elif} clause. The first element of the pair is
+the test expression. The second elements is a \class{Stmt} node that
+contains the code to execute if the test is true.
The \method{getChildren()} method of \class{If} returns a flat list of
-child nodes. If there are three if/elif clauses and no else clause,
-then \method{getChildren()} will return a list of six elements: the
-first test expression, the first \class{Stmt}, the second text
-expression, etc.
+child nodes. If there are three \keyword{if}/\keyword{elif} clauses
+and no \keyword{else} clause, then \method{getChildren()} will return
+a list of six elements: the first test expression, the first
+\class{Stmt}, the second text expression, etc.
The following table lists each of the \class{Node} subclasses defined
in \module{compiler.ast} and each of the public attributes available
@@ -215,6 +220,7 @@ returned by \method{getChildren()} and \method{getChildNodes()}.
\input{asttable}
+
\section{Assignment nodes}
There is a collection of nodes used to represent assignments. Each
@@ -229,9 +235,12 @@ Each \class{Node} in the list will be one of the following classes:
XXX Explain what the AssXXX nodes are for. Mention \code{a.b.c = 2}
as an example. Explain what the flags are for.
-\chapter{Using Visitors to Walk ASTs}
-The visitor pattern is ... The \module{compiler} package uses a
+\section{Using Visitors to Walk ASTs}
+
+\declaremodule{}{compiler.visitor}
+
+The visitor pattern is ... The \refmodule{compiler} package uses a
variant on the visitor pattern that takes advantage of Python's
introspection features to elminiate the need for much of the visitor's
infrastructure.
@@ -243,6 +252,9 @@ rest.
XXX The magic \method{visit()} method for visitors.
+\begin{funcdesc}{walk}{tree, visitor\optional{, verbose}}
+\end{funcdesc}
+
\begin{classdesc}{ASTVisitor}{}
The \class{ASTVisitor} is responsible for walking over the tree in the
@@ -259,48 +271,32 @@ the visitor argument by adding a visit method to the visitor; this
method can be used to visit a particular child node. If no visitor is
found for a particular node type, the \method{default()} method is
called.
+\end{classdesc}
+
+\class{ASTVisitor} objects have the following methods:
XXX describe extra arguments
-\begin{methoddesc}{default}{node\optional{, *args}}
+\begin{methoddesc}{default}{node\optional{, \moreargs}}
\end{methoddesc}
-\begin{methoddesc}{dispatch}{node\optional{, *args}}
+\begin{methoddesc}{dispatch}{node\optional{, \moreargs}}
\end{methoddesc}
\begin{methoddesc}{preorder}{tree, visitor}
\end{methoddesc}
-\end{classdesc}
-\begin{funcdesc}{walk}{tree, visitor\optional{, verbose=None}}
-\end{funcdesc}
-
-\chapter{Bytecode Generation}
+\section{Bytecode Generation}
-The code generator is a visit that emits bytecodes. Each visit method
+The code generator is a visitor that emits bytecodes. Each visit method
can call the \method{emit()} method to emit a new bytecode. The basic
code generator is specialized for modules, classes, and functions. An
assembler converts that emitted instructions to the low-level bytecode
format. It handles things like generator of constant lists of code
objects and calculation of jump offsets.
-%
-% The ugly "%begin{latexonly}" pseudo-environments are really just to
-% keep LaTeX2HTML quiet during the \renewcommand{} macros; they're
-% not really valuable.
-%
-% If you don't want the Module Index, you can remove all of this up
-% until the second \input line.
-%
-%begin{latexonly}
-\renewcommand{\indexname}{Module Index}
-%end{latexonly}
-\input{mod\jobname.ind} % Module Index
-
-%begin{latexonly}
-\renewcommand{\indexname}{Index}
-%end{latexonly}
-\input{\jobname.ind} % Index
+
+\input{compiler.ind} % Index
\end{document}