From f666917ab76a483447d5da33ebacf57ab385cb10 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Wed, 6 May 1998 19:52:49 +0000 Subject: The Python Reference Manual. --- Doc/ref.tex | 56 ++++ Doc/ref/ref.tex | 56 ++++ Doc/ref/ref1.tex | 81 +++++ Doc/ref/ref2.tex | 372 +++++++++++++++++++++++ Doc/ref/ref3.tex | 889 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ Doc/ref/ref4.tex | 200 +++++++++++++ Doc/ref/ref5.tex | 759 +++++++++++++++++++++++++++++++++++++++++++++++ Doc/ref/ref6.tex | 512 ++++++++++++++++++++++++++++++++ Doc/ref/ref7.tex | 391 ++++++++++++++++++++++++ Doc/ref/ref8.tex | 105 +++++++ Doc/ref1.tex | 81 +++++ Doc/ref2.tex | 372 +++++++++++++++++++++++ Doc/ref3.tex | 889 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ Doc/ref4.tex | 200 +++++++++++++ Doc/ref5.tex | 759 +++++++++++++++++++++++++++++++++++++++++++++++ Doc/ref6.tex | 512 ++++++++++++++++++++++++++++++++ Doc/ref7.tex | 391 ++++++++++++++++++++++++ Doc/ref8.tex | 105 +++++++ 18 files changed, 6730 insertions(+) create mode 100644 Doc/ref.tex create mode 100644 Doc/ref/ref.tex create mode 100644 Doc/ref/ref1.tex create mode 100644 Doc/ref/ref2.tex create mode 100644 Doc/ref/ref3.tex create mode 100644 Doc/ref/ref4.tex create mode 100644 Doc/ref/ref5.tex create mode 100644 Doc/ref/ref6.tex create mode 100644 Doc/ref/ref7.tex create mode 100644 Doc/ref/ref8.tex create mode 100644 Doc/ref1.tex create mode 100644 Doc/ref2.tex create mode 100644 Doc/ref3.tex create mode 100644 Doc/ref4.tex create mode 100644 Doc/ref5.tex create mode 100644 Doc/ref6.tex create mode 100644 Doc/ref7.tex create mode 100644 Doc/ref8.tex diff --git a/Doc/ref.tex b/Doc/ref.tex new file mode 100644 index 0000000..c741b6d --- /dev/null +++ b/Doc/ref.tex @@ -0,0 +1,56 @@ +\documentclass{manual} + +\title{Python Reference Manual} + +\input{boilerplate} + +\makeindex + +\begin{document} + +\maketitle + +\input{copyright} + +\begin{abstract} + +\noindent +Python is a simple, yet powerful, interpreted programming language +that bridges the gap between C and shell programming, and is thus +ideally suited for ``throw-away programming'' and rapid prototyping. +Its syntax is put together from constructs borrowed from a variety of +other languages; most prominent are influences from ABC, C, Modula-3 +and Icon. + +The Python interpreter is easily extended with new functions and data +types implemented in C. Python is also suitable as an extension +language for highly customizable C applications such as editors or +window managers. + +Python is available for various operating systems, amongst which +several flavors of {\UNIX} (including Linux), the Apple Macintosh O.S., +MS-DOS, MS-Windows 3.1, Windows NT, and OS/2. + +This reference manual describes the syntax and ``core semantics'' of +the language. It is terse, but attempts to be exact and complete. +The semantics of non-essential built-in object types and of the +built-in functions and modules are described in the {\em Python +Library Reference}. For an informal introduction to the language, see +the {\em Python Tutorial}. + +\end{abstract} + +\tableofcontents + +\include{ref1} % Introduction +\include{ref2} % Lexical analysis +\include{ref3} % Data model +\include{ref4} % Execution model +\include{ref5} % Expressions and conditions +\include{ref6} % Simple statements +\include{ref7} % Compound statements +\include{ref8} % Top-level components + +\input{ref.ind} + +\end{document} diff --git a/Doc/ref/ref.tex b/Doc/ref/ref.tex new file mode 100644 index 0000000..c741b6d --- /dev/null +++ b/Doc/ref/ref.tex @@ -0,0 +1,56 @@ +\documentclass{manual} + +\title{Python Reference Manual} + +\input{boilerplate} + +\makeindex + +\begin{document} + +\maketitle + +\input{copyright} + +\begin{abstract} + +\noindent +Python is a simple, yet powerful, interpreted programming language +that bridges the gap between C and shell programming, and is thus +ideally suited for ``throw-away programming'' and rapid prototyping. +Its syntax is put together from constructs borrowed from a variety of +other languages; most prominent are influences from ABC, C, Modula-3 +and Icon. + +The Python interpreter is easily extended with new functions and data +types implemented in C. Python is also suitable as an extension +language for highly customizable C applications such as editors or +window managers. + +Python is available for various operating systems, amongst which +several flavors of {\UNIX} (including Linux), the Apple Macintosh O.S., +MS-DOS, MS-Windows 3.1, Windows NT, and OS/2. + +This reference manual describes the syntax and ``core semantics'' of +the language. It is terse, but attempts to be exact and complete. +The semantics of non-essential built-in object types and of the +built-in functions and modules are described in the {\em Python +Library Reference}. For an informal introduction to the language, see +the {\em Python Tutorial}. + +\end{abstract} + +\tableofcontents + +\include{ref1} % Introduction +\include{ref2} % Lexical analysis +\include{ref3} % Data model +\include{ref4} % Execution model +\include{ref5} % Expressions and conditions +\include{ref6} % Simple statements +\include{ref7} % Compound statements +\include{ref8} % Top-level components + +\input{ref.ind} + +\end{document} diff --git a/Doc/ref/ref1.tex b/Doc/ref/ref1.tex new file mode 100644 index 0000000..30bfcce --- /dev/null +++ b/Doc/ref/ref1.tex @@ -0,0 +1,81 @@ +\chapter{Introduction} + +This reference manual describes the Python programming language. +It is not intended as a tutorial. + +While I am trying to be as precise as possible, I chose to use English +rather than formal specifications for everything except syntax and +lexical analysis. This should make the document more understandable +to the average reader, but will leave room for ambiguities. +Consequently, if you were coming from Mars and tried to re-implement +Python from this document alone, you might have to guess things and in +fact you would probably end up implementing quite a different language. +On the other hand, if you are using +Python and wonder what the precise rules about a particular area of +the language are, you should definitely be able to find them here. + +It is dangerous to add too many implementation details to a language +reference document --- the implementation may change, and other +implementations of the same language may work differently. On the +other hand, there is currently only one Python implementation, and +its particular quirks are sometimes worth being mentioned, especially +where the implementation imposes additional limitations. Therefore, +you'll find short ``implementation notes'' sprinkled throughout the +text. + +Every Python implementation comes with a number of built-in and +standard modules. These are not documented here, but in the separate +{\em Python Library Reference} document. A few built-in modules are +mentioned when they interact in a significant way with the language +definition. + +\section{Notation} + +The descriptions of lexical analysis and syntax use a modified BNF +grammar notation. This uses the following style of definition: +\index{BNF} +\index{grammar} +\index{syntax} +\index{notation} + +\begin{verbatim} +name: lc_letter (lc_letter | "_")* +lc_letter: "a"..."z" +\end{verbatim} + +The first line says that a \verb@name@ is an \verb@lc_letter@ followed by +a sequence of zero or more \verb@lc_letter@s and underscores. An +\verb@lc_letter@ in turn is any of the single characters `a' through `z'. +(This rule is actually adhered to for the names defined in lexical and +grammar rules in this document.) + +Each rule begins with a name (which is the name defined by the rule) +and a colon. A vertical bar (\verb@|@) is used to separate +alternatives; it is the least binding operator in this notation. A +star (\verb@*@) means zero or more repetitions of the preceding item; +likewise, a plus (\verb@+@) means one or more repetitions, and a +phrase enclosed in square brackets (\verb@[ ]@) means zero or one +occurrences (in other words, the enclosed phrase is optional). The +\verb@*@ and \verb@+@ operators bind as tightly as possible; +parentheses are used for grouping. Literal strings are enclosed in +quotes. White space is only meaningful to separate tokens. +Rules are normally contained on a single line; rules with many +alternatives may be formatted alternatively with each line after the +first beginning with a vertical bar. + +In lexical definitions (as the example above), two more conventions +are used: Two literal characters separated by three dots mean a choice +of any single character in the given (inclusive) range of \ASCII{} +characters. A phrase between angular brackets (\verb@<...>@) gives an +informal description of the symbol defined; e.g. this could be used +to describe the notion of `control character' if needed. +\index{lexical definitions} +\index{ASCII} + +Even though the notation used is almost the same, there is a big +difference between the meaning of lexical and syntactic definitions: +a lexical definition operates on the individual characters of the +input source, while a syntax definition operates on the stream of +tokens generated by the lexical analysis. All uses of BNF in the next +chapter (``Lexical Analysis'') are lexical definitions; uses in +subsequent chapters are syntactic definitions. diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex new file mode 100644 index 0000000..b093998 --- /dev/null +++ b/Doc/ref/ref2.tex @@ -0,0 +1,372 @@ +\chapter{Lexical analysis} + +A Python program is read by a {\em parser}. Input to the parser is a +stream of {\em tokens}, generated by the {\em lexical analyzer}. This +chapter describes how the lexical analyzer breaks a file into tokens. +\index{lexical analysis} +\index{parser} +\index{token} + +\section{Line structure} + +A Python program is divided in a number of logical lines. The end of +a logical line is represented by the token NEWLINE. Statements cannot +cross logical line boundaries except where NEWLINE is allowed by the +syntax (e.g. between statements in compound statements). +\index{line structure} +\index{logical line} +\index{NEWLINE token} + +\subsection{Comments} + +A comment starts with a hash character (\verb@#@) that is not part of +a string literal, and ends at the end of the physical line. A comment +always signifies the end of the logical line. Comments are ignored by +the syntax. +\index{comment} +\index{logical line} +\index{physical line} +\index{hash character} + +\subsection{Explicit line joining} + +Two or more physical lines may be joined into logical lines using +backslash characters (\verb/\/), as follows: when a physical line ends +in a backslash that is not part of a string literal or comment, it is +joined with the following forming a single logical line, deleting the +backslash and the following end-of-line character. For example: +\index{physical line} +\index{line joining} +\index{line continuation} +\index{backslash character} +% +\begin{verbatim} +if 1900 < year < 2100 and 1 <= month <= 12 \ + and 1 <= day <= 31 and 0 <= hour < 24 \ + and 0 <= minute < 60 and 0 <= second < 60: # Looks like a valid date + return 1 +\end{verbatim} + +A line ending in a backslash cannot carry a comment; a backslash does +not continue a comment (but it does continue a string literal, see +below). + +\subsection{Implicit line joining} + +Expressions in parentheses, square brackets or curly braces can be +split over more than one physical line without using backslashes. +For example: + +\begin{verbatim} +month_names = ['Januari', 'Februari', 'Maart', # These are the + 'April', 'Mei', 'Juni', # Dutch names + 'Juli', 'Augustus', 'September', # for the months + 'Oktober', 'November', 'December'] # of the year +\end{verbatim} + +Implicitly continued lines can carry comments. The indentation of the +continuation lines is not important. Blank continuation lines are +allowed. + +\subsection{Blank lines} + +A logical line that contains only spaces, tabs, and possibly a +comment, is ignored (i.e., no NEWLINE token is generated), except that +during interactive input of statements, an entirely blank logical line +terminates a multi-line statement. +\index{blank line} + +\subsection{Indentation} + +Leading whitespace (spaces and tabs) at the beginning of a logical +line is used to compute the indentation level of the line, which in +turn is used to determine the grouping of statements. +\index{indentation} +\index{whitespace} +\index{leading whitespace} +\index{space} +\index{tab} +\index{grouping} +\index{statement grouping} + +First, tabs are replaced (from left to right) by one to eight spaces +such that the total number of characters up to there is a multiple of +eight (this is intended to be the same rule as used by {\UNIX}). The +total number of spaces preceding the first non-blank character then +determines the line's indentation. Indentation cannot be split over +multiple physical lines using backslashes. + +The indentation levels of consecutive lines are used to generate +INDENT and DEDENT tokens, using a stack, as follows. +\index{INDENT token} +\index{DEDENT token} + +Before the first line of the file is read, a single zero is pushed on +the stack; this will never be popped off again. The numbers pushed on +the stack will always be strictly increasing from bottom to top. At +the beginning of each logical line, the line's indentation level is +compared to the top of the stack. If it is equal, nothing happens. +If it is larger, it is pushed on the stack, and one INDENT token is +generated. If it is smaller, it {\em must} be one of the numbers +occurring on the stack; all numbers on the stack that are larger are +popped off, and for each number popped off a DEDENT token is +generated. At the end of the file, a DEDENT token is generated for +each number remaining on the stack that is larger than zero. + +Here is an example of a correctly (though confusingly) indented piece +of Python code: + +\begin{verbatim} +def perm(l): + # Compute the list of all permutations of l + + if len(l) <= 1: + return [l] + r = [] + for i in range(len(l)): + s = l[:i] + l[i+1:] + p = perm(s) + for x in p: + r.append(l[i:i+1] + x) + return r +\end{verbatim} + +The following example shows various indentation errors: + +\begin{verbatim} + def perm(l): # error: first line indented + for i in range(len(l)): # error: not indented + s = l[:i] + l[i+1:] + p = perm(l[:i] + l[i+1:]) # error: unexpected indent + for x in p: + r.append(l[i:i+1] + x) + return r # error: inconsistent dedent +\end{verbatim} + +(Actually, the first three errors are detected by the parser; only the +last error is found by the lexical analyzer --- the indentation of +\verb@return r@ does not match a level popped off the stack.) + +\section{Other tokens} + +Besides NEWLINE, INDENT and DEDENT, the following categories of tokens +exist: identifiers, keywords, literals, operators, and delimiters. +Spaces and tabs are not tokens, but serve to delimit tokens. Where +ambiguity exists, a token comprises the longest possible string that +forms a legal token, when read from left to right. + +\section{Identifiers} + +Identifiers (also referred to as names) are described by the following +lexical definitions: +\index{identifier} +\index{name} + +\begin{verbatim} +identifier: (letter|"_") (letter|digit|"_")* +letter: lowercase | uppercase +lowercase: "a"..."z" +uppercase: "A"..."Z" +digit: "0"..."9" +\end{verbatim} + +Identifiers are unlimited in length. Case is significant. + +\subsection{Keywords} + +The following identifiers are used as reserved words, or {\em +keywords} of the language, and cannot be used as ordinary +identifiers. They must be spelled exactly as written here: +\index{keyword} +\index{reserved word} + +\begin{verbatim} +and elif global not try +break else if or while +class except import pass +continue finally in print +def for is raise +del from lambda return +\end{verbatim} + +% When adding keywords, pipe it through keywords.py for reformatting + +\section{Literals} \label{literals} + +Literals are notations for constant values of some built-in types. +\index{literal} +\index{constant} + +\subsection{String literals} + +String literals are described by the following lexical definitions: +\index{string literal} + +\begin{verbatim} +stringliteral: shortstring | longstring +shortstring: "'" shortstringitem* "'" | '"' shortstringitem* '"' +longstring: "'''" longstringitem* "'''" | '"""' longstringitem* '"""' +shortstringitem: shortstringchar | escapeseq +longstringitem: longstringchar | escapeseq +shortstringchar: +longstringchar: +escapeseq: "\" +\end{verbatim} +\index{ASCII} + +In ``long strings'' (strings surrounded by sets of three quotes), +unescaped newlines and quotes are allowed (and are retained), except +that three unescaped quotes in a row terminate the string. (A +``quote'' is the character used to open the string, i.e. either +\verb/'/ or \verb/"/.) + +Escape sequences in strings are interpreted according to rules similar +to those used by Standard C. The recognized escape sequences are: +\index{physical line} +\index{escape sequence} +\index{Standard C} +\index{C} + +\begin{center} +\begin{tabular}{|l|l|} +\hline +\verb/\/{\em newline} & Ignored \\ +\verb/\\/ & Backslash (\verb/\/) \\ +\verb/\'/ & Single quote (\verb/'/) \\ +\verb/\"/ & Double quote (\verb/"/) \\ +\verb/\a/ & \ASCII{} Bell (BEL) \\ +\verb/\b/ & \ASCII{} Backspace (BS) \\ +%\verb/\E/ & \ASCII{} Escape (ESC) \\ +\verb/\f/ & \ASCII{} Formfeed (FF) \\ +\verb/\n/ & \ASCII{} Linefeed (LF) \\ +\verb/\r/ & \ASCII{} Carriage Return (CR) \\ +\verb/\t/ & \ASCII{} Horizontal Tab (TAB) \\ +\verb/\v/ & \ASCII{} Vertical Tab (VT) \\ +\verb/\/{\em ooo} & \ASCII{} character with octal value {\em ooo} \\ +\verb/\x/{\em xx...} & \ASCII{} character with hex value {\em xx...} \\ +\hline +\end{tabular} +\end{center} +\index{ASCII} + +In strict compatibility with Standard C, up to three octal digits are +accepted, but an unlimited number of hex digits is taken to be part of +the hex escape (and then the lower 8 bits of the resulting hex number +are used in all current implementations...). + +All unrecognized escape sequences are left in the string unchanged, +i.e., {\em the backslash is left in the string.} (This behavior is +useful when debugging: if an escape sequence is mistyped, the +resulting output is more easily recognized as broken. It also helps a +great deal for string literals used as regular expressions or +otherwise passed to other modules that do their own escape handling.) +\index{unrecognized escape sequence} + +\subsection{Numeric literals} + +There are three types of numeric literals: plain integers, long +integers, and floating point numbers. +\index{number} +\index{numeric literal} +\index{integer literal} +\index{plain integer literal} +\index{long integer literal} +\index{floating point literal} +\index{hexadecimal literal} +\index{octal literal} +\index{decimal literal} + +Integer and long integer literals are described by the following +lexical definitions: + +\begin{verbatim} +longinteger: integer ("l"|"L") +integer: decimalinteger | octinteger | hexinteger +decimalinteger: nonzerodigit digit* | "0" +octinteger: "0" octdigit+ +hexinteger: "0" ("x"|"X") hexdigit+ + +nonzerodigit: "1"..."9" +octdigit: "0"..."7" +hexdigit: digit|"a"..."f"|"A"..."F" +\end{verbatim} + +Although both lower case `l' and upper case `L' are allowed as suffix +for long integers, it is strongly recommended to always use `L', since +the letter `l' looks too much like the digit `1'. + +Plain integer decimal literals must be at most 2147483647 (i.e., the +largest positive integer, using 32-bit arithmetic). Plain octal and +hexadecimal literals may be as large as 4294967295, but values larger +than 2147483647 are converted to a negative value by subtracting +4294967296. There is no limit for long integer literals apart from +what can be stored in available memory. + +Some examples of plain and long integer literals: + +\begin{verbatim} +7 2147483647 0177 0x80000000 +3L 79228162514264337593543950336L 0377L 0x100000000L +\end{verbatim} + +Floating point literals are described by the following lexical +definitions: + +\begin{verbatim} +floatnumber: pointfloat | exponentfloat +pointfloat: [intpart] fraction | intpart "." +exponentfloat: (intpart | pointfloat) exponent +intpart: digit+ +fraction: "." digit+ +exponent: ("e"|"E") ["+"|"-"] digit+ +\end{verbatim} + +The allowed range of floating point literals is +implementation-dependent. + +Some examples of floating point literals: + +\begin{verbatim} +3.14 10. .001 1e100 3.14e-10 +\end{verbatim} + +Note that numeric literals do not include a sign; a phrase like +\verb@-1@ is actually an expression composed of the operator +\verb@-@ and the literal \verb@1@. + +\section{Operators} + +The following tokens are operators: +\index{operators} + +\begin{verbatim} ++ - * / % +<< >> & | ^ ~ +< == > <= <> != >= +\end{verbatim} + +The comparison operators \verb@<>@ and \verb@!=@ are alternate +spellings of the same operator. + +\section{Delimiters} + +The following tokens serve as delimiters or otherwise have a special +meaning: +\index{delimiters} + +\begin{verbatim} +( ) [ ] { } +, : . " ` ' += ; +\end{verbatim} + +The following printing \ASCII{} characters are not used in Python. Their +occurrence outside string literals and comments is an unconditional +error: +\index{ASCII} + +\begin{verbatim} +@ $ ? +\end{verbatim} + +They may be used by future versions of the language though! diff --git a/Doc/ref/ref3.tex b/Doc/ref/ref3.tex new file mode 100644 index 0000000..fd152c1 --- /dev/null +++ b/Doc/ref/ref3.tex @@ -0,0 +1,889 @@ +\chapter{Data model} + +\section{Objects, values and types} + +\dfn{Objects} are Python's abstraction for data. All data in a Python +program is represented by objects or by relations between objects. +(In a sense, and in conformance to Von Neumann's model of a +``stored program computer'', code is also represented by objects.) +\index{object} +\index{data} + +Every object has an identity, a type and a value. An object's +\emph{identity} never changes once it has been created; you may think +of it as the object's address in memory. An object's \dfn{type} is +also unchangeable. It determines the operations that an object +supports (e.g.\ ``does it have a length?'') and also defines the +possible values for objects of that type. The \emph{value} of some +objects can change. Objects whose value can change are said to be +\emph{mutable}; objects whose value is unchangeable once they are +created are called \emph{immutable}. The type determines an object's +(im)mutability. +\index{identity of an object} +\index{value of an object} +\index{type of an object} +\index{mutable object} +\index{immutable object} + +Objects are never explicitly destroyed; however, when they become +unreachable they may be garbage-collected. An implementation is +allowed to delay garbage collection or omit it altogether --- it is a +matter of implementation quality how garbage collection is +implemented, as long as no objects are collected that are still +reachable. (Implementation note: the current implementation uses a +reference-counting scheme which collects most objects as soon as they +become unreachable, but never collects garbage containing circular +references.) +\index{garbage collection} +\index{reference counting} +\index{unreachable object} + +Note that the use of the implementation's tracing or debugging +facilities may keep objects alive that would normally be collectable. + +Some objects contain references to ``external'' resources such as open +files or windows. It is understood that these resources are freed +when the object is garbage-collected, but since garbage collection is +not guaranteed to happen, such objects also provide an explicit way to +release the external resource, usually a \method{close()} method. +Programs are strongly recommended to always explicitly close such +objects. + +Some objects contain references to other objects; these are called +\emph{containers}. Examples of containers are tuples, lists and +dictionaries. The references are part of a container's value. In +most cases, when we talk about the value of a container, we imply the +values, not the identities of the contained objects; however, when we +talk about the (im)mutability of a container, only the identities of +the immediately contained objects are implied. (So, if an immutable +container contains a reference to a mutable object, its value changes +if that mutable object is changed.) +\index{container} + +Types affect almost all aspects of objects' lives. Even the meaning +of object identity is affected in some sense: for immutable types, +operations that compute new values may actually return a reference to +any existing object with the same type and value, while for mutable +objects this is not allowed. E.g. after + +\begin{verbatim} +a = 1; b = 1; c = []; d = [] +\end{verbatim} + +\code{a} and \code{b} may or may not refer to the same object with the +value one, depending on the implementation, but \code{c} and \code{d} +are guaranteed to refer to two different, unique, newly created empty +lists. + +\section{The standard type hierarchy} \label{types} + +Below is a list of the types that are built into Python. Extension +modules written in C can define additional types. Future versions of +Python may add types to the type hierarchy (e.g.\ rational or complex +numbers, efficiently stored arrays of integers, etc.). +\index{type} +\indexii{data}{type} +\indexii{type}{hierarchy} +\indexii{extension}{module} +\indexii{C}{language} + +Some of the type descriptions below contain a paragraph listing +`special attributes'. These are attributes that provide access to the +implementation and are not intended for general use. Their definition +may change in the future. There are also some `generic' special +attributes, not listed with the individual objects: \member{__methods__} +is a list of the method names of a built-in object, if it has any; +\member{__members__} is a list of the data attribute names of a built-in +object, if it has any. +\index{attribute} +\indexii{special}{attribute} +\indexiii{generic}{special}{attribute} +\ttindex{__methods__} +\ttindex{__members__} + +\begin{description} + +\item[None] +This type has a single value. There is a single object with this value. +This object is accessed through the built-in name \code{None}. +It is returned from functions that don't explicitly return an object. +\ttindex{None} +\obindex{None@{\tt None}} + +\item[Numbers] +These are created by numeric literals and returned as results by +arithmetic operators and arithmetic built-in functions. Numeric +objects are immutable; once created their value never changes. Python +numbers are of course strongly related to mathematical numbers, but +subject to the limitations of numerical representation in computers. +\obindex{number} +\obindex{numeric} + +Python distinguishes between integers and floating point numbers: + +\begin{description} +\item[Integers] +These represent elements from the mathematical set of whole numbers. +\obindex{integer} + +There are two types of integers: + +\begin{description} + +\item[Plain integers] +These represent numbers in the range -2147483648 through 2147483647. +(The range may be larger on machines with a larger natural word +size, but not smaller.) +When the result of an operation falls outside this range, the +exception \exception{OverflowError} is raised. +For the purpose of shift and mask operations, integers are assumed to +have a binary, 2's complement notation using 32 or more bits, and +hiding no bits from the user (i.e., all 4294967296 different bit +patterns correspond to different values). +\obindex{plain integer} +\withsubitem{(built-in exception)}{\ttindex{OverflowError}} + +\item[Long integers] +These represent numbers in an unlimited range, subject to available +(virtual) memory only. For the purpose of shift and mask operations, +a binary representation is assumed, and negative numbers are +represented in a variant of 2's complement which gives the illusion of +an infinite string of sign bits extending to the left. +\obindex{long integer} + +\end{description} % Integers + +The rules for integer representation are intended to give the most +meaningful interpretation of shift and mask operations involving +negative integers and the least surprises when switching between the +plain and long integer domains. For any operation except left shift, +if it yields a result in the plain integer domain without causing +overflow, it will yield the same result in the long integer domain or +when using mixed operands. +\indexii{integer}{representation} + +\item[Floating point numbers] +These represent machine-level double precision floating point numbers. +You are at the mercy of the underlying machine architecture and +C implementation for the accepted range and handling of overflow. +\obindex{floating point} +\indexii{floating point}{number} +\indexii{C}{language} + +\end{description} % Numbers + +\item[Sequences] +These represent finite ordered sets indexed by natural numbers. +The built-in function \function{len()}\bifuncindex{len} returns the +number of elements of a sequence. When this number is \var{n}, the +index set contains the numbers 0, 1, \ldots, \var{n}-1. Element +\var{i} of sequence \var{a} is selected by \code{\var{a}[\var{i}]}. +\obindex{seqence} +\index{index operation} +\index{item selection} +\index{subscription} + +Sequences also support slicing: \code{\var{a}[\var{i}:\var{j}]} +selects all elements with index \var{k} such that \var{i} \code{<=} +\var{k} \code{<} \var{j}. When used as an expression, a slice is a +sequence of the same type --- this implies that the index set is +renumbered so that it starts at 0 again. +\index{slicing} + +Sequences are distinguished according to their mutability: + +\begin{description} +% +\item[Immutable sequences] +An object of an immutable sequence type cannot change once it is +created. (If the object contains references to other objects, +these other objects may be mutable and may be changed; however +the collection of objects directly referenced by an immutable object +cannot change.) +\obindex{immutable sequence} +\obindex{immutable} + +The following types are immutable sequences: + +\begin{description} + +\item[Strings] +The elements of a string are characters. There is no separate +character type; a character is represented by a string of one element. +Characters represent (at least) 8-bit bytes. The built-in +functions \function{chr()}\bifuncindex{chr} and +\function{ord()}\bifuncindex{ord} convert between characters and +nonnegative integers representing the byte values. Bytes with the +values 0-127 represent the corresponding \ASCII{} values. The string +data type is also used to represent arrays of bytes, e.g.\ to hold data +read from a file. +\obindex{string} +\index{character} +\index{byte} +\index{ASCII} + +(On systems whose native character set is not \ASCII{}, strings may use +EBCDIC in their internal representation, provided the functions +\function{chr()} and \function{ord()} implement a mapping between \ASCII{} and +EBCDIC, and string comparison preserves the \ASCII{} order. +Or perhaps someone can propose a better rule?) +\index{ASCII} +\index{EBCDIC} +\index{character set} +\indexii{string}{comparison} +\bifuncindex{chr} +\bifuncindex{ord} + +\item[Tuples] +The elements of a tuple are arbitrary Python objects. +Tuples of two or more elements are formed by comma-separated lists +of expressions. A tuple of one element (a `singleton') can be formed +by affixing a comma to an expression (an expression by itself does +not create a tuple, since parentheses must be usable for grouping of +expressions). An empty tuple can be formed by enclosing `nothing' in +parentheses. +\obindex{tuple} +\indexii{singleton}{tuple} +\indexii{empty}{tuple} + +\end{description} % Immutable sequences + +\item[Mutable sequences] +Mutable sequences can be changed after they are created. The +subscription and slicing notations can be used as the target of +assignment and \keyword{del} (delete) statements. +\obindex{mutable sequece} +\obindex{mutable} +\indexii{assignment}{statement} +\index{delete} +\stindex{del} +\index{subscription} +\index{slicing} + +There is currently a single mutable sequence type: + +\begin{description} + +\item[Lists] +The elements of a list are arbitrary Python objects. Lists are formed +by placing a comma-separated list of expressions in square brackets. +(Note that there are no special cases needed to form lists of length 0 +or 1.) +\obindex{list} + +\end{description} % Mutable sequences + +\end{description} % Sequences + +\item[Mapping types] +These represent finite sets of objects indexed by arbitrary index sets. +The subscript notation \code{a[k]} selects the element indexed +by \code{k} from the mapping \code{a}; this can be used in +expressions and as the target of assignments or \keyword{del} statements. +The built-in function \function{len()} returns the number of elements +in a mapping. +\bifuncindex{len} +\index{subscription} +\obindex{mapping} + +There is currently a single mapping type: + +\begin{description} + +\item[Dictionaries] +These represent finite sets of objects indexed by almost arbitrary +values. The only types of values not acceptable as keys are values +containing lists or dictionaries or other mutable types that are +compared by value rather than by object identity --- the reason being +that the implementation requires that a key's hash value be constant. +Numeric types used for keys obey the normal rules for numeric +comparison: if two numbers compare equal (e.g.\ \code{1} and +\code{1.0}) then they can be used interchangeably to index the same +dictionary entry. + +Dictionaries are mutable; they are created by the \code{...} +notation (see section \ref{dict}). +\obindex{dictionary} +\obindex{mutable} + +\end{description} % Mapping types + +\item[Callable types] +These are the types to which the function call (invocation) operation, +written as \code{function(argument, argument, ...)}, can be applied: +\indexii{function}{call} +\index{invocation} +\indexii{function}{argument} +\obindex{callable} + +\begin{description} + +\item[User-defined functions] +A user-defined function object is created by a function definition +(see section \ref{function}). It should be called with an argument +list containing the same number of items as the function's formal +parameter list. +\indexii{user-defined}{function} +\obindex{function} +\obindex{user-defined function} + +Special read-only attributes: \member{func_code} is the code object +representing the compiled function body, and \member{func_globals} is (a +reference to) the dictionary that holds the function's global +variables --- it implements the global name space of the module in +which the function was defined. +\ttindex{func_code} +\ttindex{func_globals} +\indexii{global}{name space} + +\item[User-defined methods] +A user-defined method (a.k.a. \dfn{object closure}) is a pair of a +class instance object and a user-defined function. It should be +called with an argument list containing one item less than the number +of items in the function's formal parameter list. When called, the +class instance becomes the first argument, and the call arguments are +shifted one to the right. +\obindex{method} +\obindex{user-defined method} +\indexii{user-defined}{method} +\index{object closure} + +Special read-only attributes: \member{im_self} is the class instance +object, \member{im_func} is the function object. +\ttindex{im_func} +\ttindex{im_self} + +\item[Built-in functions] +A built-in function object is a wrapper around a C function. Examples +of built-in functions are \function{len()} and \function{math.sin()}. There +are no special attributes. The number and type of the arguments are +determined by the C function. +\obindex{built-in function} +\obindex{function} +\indexii{C}{language} + +\item[Built-in methods] +This is really a different disguise of a built-in function, this time +containing an object passed to the \C{} function as an implicit extra +argument. An example of a built-in method is \code{\var{list}.append()} if +\var{list} is a list object. +\obindex{built-in method} +\obindex{method} +\indexii{built-in}{method} + +\item[Classes] +Class objects are described below. When a class object is called as a +function, a new class instance (also described below) is created and +returned. This implies a call to the class's \method{__init__()} method +if it has one. Any arguments are passed on to the \method{__init__()} +method --- if there is no \method{__init__()} method, the class must be called +without arguments. +\ttindex{__init__} +\obindex{class} +\obindex{class instance} +\obindex{instance} +\indexii{class object}{call} + +\end{description} + +\item[Modules] +Modules are imported by the \keyword{import} statement (see section +\ref{import}). A module object is a container for a module's name +space, which is a dictionary (the same dictionary as referenced by the +\member{func_globals} attribute of functions defined in the module). +Module attribute references are translated to lookups in this +dictionary. A module object does not contain the code object used to +initialize the module (since it isn't needed once the initialization +is done). +\stindex{import} +\obindex{module} + +Attribute assignment update the module's name space dictionary. + +Special read-only attribute: \member{__dict__} yields the module's name +space as a dictionary object. Predefined attributes: \member{__name__} +yields the module's name as a string object; \member{__doc__} yields the +module's documentation string as a string object, or +\code{None} if no documentation string was found. +\ttindex{__dict__} +\ttindex{__name__} +\ttindex{__doc__} +\indexii{module}{name space} + +\item[Classes] +Class objects are created by class definitions (see section +\ref{class}). A class is a container for a dictionary containing the +class's name space. Class attribute references are translated to +lookups in this dictionary. When an attribute name is not found +there, the attribute search continues in the base classes. The search +is depth-first, left-to-right in the order of their occurrence in the +base class list. +\obindex{class} +\obindex{class instance} +\obindex{instance} +\indexii{class object}{call} +\index{container} +\obindex{dictionary} +\indexii{class}{attribute} + +Class attribute assignments update the class's dictionary, never the +dictionary of a base class. +\indexiii{class}{attribute}{assignment} + +A class can be called as a function to yield a class instance (see +above). +\indexii{class object}{call} + +Special read-only attributes: \member{__dict__} yields the dictionary +containing the class's name space; \member{__bases__} yields a tuple +(possibly empty or a singleton) containing the base classes, in the +order of their occurrence in the base class list. +\ttindex{__dict__} +\ttindex{__bases__} + +\item[Class instances] +A class instance is created by calling a class object as a +function. A class instance has a dictionary in which +attribute references are searched. When an attribute is not found +there, and the instance's class has an attribute by that name, and +that class attribute is a user-defined function (and in no other +cases), the instance attribute reference yields a user-defined method +object (see above) constructed from the instance and the function. +\obindex{class instance} +\obindex{instance} +\indexii{class}{instance} +\indexii{class instance}{attribute} + +Attribute assignments update the instance's dictionary. +\indexiii{class instance}{attribute}{assignment} + +Class instances can pretend to be numbers, sequences, or mappings if +they have methods with certain special names. These are described in +section \ref{specialnames}. +\obindex{number} +\obindex{sequence} +\obindex{mapping} + +Special read-only attributes: \member{__dict__} yields the attribute +dictionary; \member{__class__} yields the instance's class. +\ttindex{__dict__} +\ttindex{__class__} + +\item[Files] +A file object represents an open file. (It is a wrapper around a \C{} +\code{stdio} file pointer.) File objects are created by the +\function{open()} built-in function, and also by \function{posix.popen()} and +the \method{makefile()} method of socket objects. \code{sys.stdin}, +\code{sys.stdout} and \code{sys.stderr} are file objects corresponding +to the interpreter's standard input, output and error streams. +See the \emph{Python Library Reference} for methods of file objects +and other details. +\obindex{file} +\indexii{C}{language} +\index{stdio} +\bifuncindex{open} +\bifuncindex{popen} +\bifuncindex{makefile} +\ttindex{stdin} +\ttindex{stdout} +\ttindex{stderr} +\ttindex{sys.stdin} +\ttindex{sys.stdout} +\ttindex{sys.stderr} + +\item[Internal types] +A few types used internally by the interpreter are exposed to the user. +Their definition may change with future versions of the interpreter, +but they are mentioned here for completeness. +\index{internal type} +\index{types, internal} + +\begin{description} + +\item[Code objects] +Code objects represent ``pseudo-compiled'' executable Python code. +The difference between a code +object and a function object is that the function object contains an +explicit reference to the function's context (the module in which it +was defined) while a code object contains no context. +\obindex{code} + +Special read-only attributes: \member{co_code} is a string representing +the sequence of instructions; \member{co_consts} is a list of literals +used by the code; \member{co_names} is a list of names (strings) used by +the code; \member{co_filename} is the filename from which the code was +compiled. (To find out the line numbers, you would have to decode the +instructions; the standard library module +\module{dis}\refstmodindex{dis} contains an example of how to do +this.) +\ttindex{co_code} +\ttindex{co_consts} +\ttindex{co_names} +\ttindex{co_filename} + +\item[Frame objects] +Frame objects represent execution frames. They may occur in traceback +objects (see below). +\obindex{frame} + +Special read-only attributes: \member{f_back} is to the previous +stack frame (towards the caller), or \code{None} if this is the bottom +stack frame; \member{f_code} is the code object being executed in this +frame; \member{f_globals} is the dictionary used to look up global +variables; \member{f_locals} is used for local variables; +\member{f_lineno} gives the line number and \member{f_lasti} gives the +precise instruction (this is an index into the instruction string of +the code object). +\ttindex{f_back} +\ttindex{f_code} +\ttindex{f_globals} +\ttindex{f_locals} +\ttindex{f_lineno} +\ttindex{f_lasti} + +\item[Traceback objects] \label{traceback} +Traceback objects represent a stack trace of an exception. A +traceback object is created when an exception occurs. When the search +for an exception handler unwinds the execution stack, at each unwound +level a traceback object is inserted in front of the current +traceback. When an exception handler is entered +(see also section \ref{try}), the stack trace is +made available to the program as \code{sys.exc_traceback}. When the +program contains no suitable handler, the stack trace is written +(nicely formatted) to the standard error stream; if the interpreter is +interactive, it is also made available to the user as +\code{sys.last_traceback}. +\obindex{traceback} +\indexii{stack}{trace} +\indexii{exception}{handler} +\indexii{execution}{stack} +\ttindex{exc_traceback} +\ttindex{last_traceback} +\ttindex{sys.exc_traceback} +\ttindex{sys.last_traceback} + +Special read-only attributes: \member{tb_next} is the next level in the +stack trace (towards the frame where the exception occurred), or +\code{None} if there is no next level; \member{tb_frame} points to the +execution frame of the current level; \member{tb_lineno} gives the line +number where the exception occurred; \member{tb_lasti} indicates the +precise instruction. The line number and last instruction in the +traceback may differ from the line number of its frame object if the +exception occurred in a \keyword{try} statement with no matching +except clause or with a finally clause. +\ttindex{tb_next} +\ttindex{tb_frame} +\ttindex{tb_lineno} +\ttindex{tb_lasti} +\stindex{try} + +\end{description} % Internal types + +\end{description} % Types + + +\section{Special method names} \label{specialnames} + +A class can implement certain operations that are invoked by special +syntax (such as subscription or arithmetic operations) by defining +methods with special names. For instance, if a class defines a +method named \method{__getitem__()}, and \code{x} is an instance of this +class, then \code{x[i]} is equivalent to \code{x.__getitem__(i)}. +(The reverse is not true --- if \code{x} is a list object, +\code{x.__getitem__(i)} is not equivalent to \code{x[i]}.) +\ttindex{__getitem__} + +Except for \method{__repr__()}, \method{__str__()} and \method{__cmp__()}, +attempts to execute an +operation raise an exception when no appropriate method is defined. +For \method{__repr__()}, the default is to return a string describing the +object's class and address. +For \method{__cmp__()}, the default is to compare instances based on their +address. +For \method{__str__()}, the default is to use \method{__repr__()}. +\ttindex{__repr__} +\ttindex{__str__} +\ttindex{__cmp__} + + +\subsection{Special methods for any type} + +\begin{description} + +\item[{\tt __init__(self, args...)}] +Called when the instance is created. The arguments are those passed +to the class constructor expression. If a base class has an +\code{__init__} method the derived class's \code{__init__} method must +explicitly call it to ensure proper initialization of the base class +part of the instance. +\ttindex{__init__} +\indexii{class}{constructor} + + +\item[{\tt __del__(self)}] +Called when the instance is about to be destroyed. If a base class +has a \method{__del__()} method the derived class's \method{__del__()} method +must explicitly call it to ensure proper deletion of the base class +part of the instance. Note that it is possible for the \method{__del__()} +method to postpone destruction of the instance by creating a new +reference to it. It may then be called at a later time when this new +reference is deleted. It is not guaranteed that +\method{__del__()} methods are called for objects that still exist when +the interpreter exits. +If an exception occurs in a \method{__del__()} method, it is ignored, and +a warning is printed on stderr. +\ttindex{__del__} +\stindex{del} + +Note that \code{del x} doesn't directly call \code{x.__del__()} --- the +former decrements the reference count for \code{x} by one, but +\code{x.__del__()} is only called when its reference count reaches zero. + +\strong{Warning:} due to the precarious circumstances under which +\code{__del__()} methods are executed, exceptions that occur during +their execution are \emph{ignored}. + +\item[{\tt __repr__(self)}] +Called by the \function{repr()} built-in function and by string conversions +(reverse or backward quotes) to compute the string representation of an object. +\ttindex{__repr__} +\bifuncindex{repr} +\indexii{string}{conversion} +\indexii{reverse}{quotes} +\indexii{backward}{quotes} +\index{back-quotes} + +\item[{\tt __str__(self)}] +Called by the \function{str()} built-in function and by the \keyword{print} +statement compute the string representation of an object. +\ttindex{__str__} +\bifuncindex{str} +\stindex{print} + +\item[{\tt __cmp__(self, other)}] +Called by all comparison operations. Should return \code{-1} if +\code{self < other}, \code{0} if \code{self == other}, \code{+1} if +\code{self > other}. If no \method{__cmp__()} operation is defined, class +instances are compared by object identity (``address''). +(Implementation note: due to limitations in the interpreter, +exceptions raised by comparisons are ignored, and the objects will be +considered equal in this case.) +\ttindex{__cmp__} +\bifuncindex{cmp} +\index{comparisons} + +\item[{\tt __hash__(self)}] +Called for the key object for dictionary operations, +and by the built-in function +\function{hash()}\bifuncindex{hash}. Should return a 32-bit integer +usable as a hash value +for dictionary operations. The only required property is that objects +which compare equal have the same hash value; it is advised to somehow +mix together (e.g.\ using exclusive or) the hash values for the +components of the object that also play a part in comparison of +objects. If a class does not define a \method{__cmp__()} method it should +not define a \method{__hash__()} operation either; if it defines +\method{__cmp__()} but not \method{__hash__()} its instances will not be +usable as dictionary keys. If a class defines mutable objects and +implements a \method{__cmp__()} method it should not implement +\method{__hash__()}, since the dictionary implementation assumes that a +key's hash value is a constant. +\obindex{dictionary} +\ttindex{__cmp__} +\ttindex{__hash__} + +\item[{\tt __call__(self, *args)}] +Called when the instance is ``called'' as a function. +\ttindex{__call__} +\indexii{call}{instance} + +\end{description} + + +\subsection{Special methods for attribute access} + +The following methods can be used to change the meaning of attribute +access for class instances. + +\begin{description} + +\item[{\tt __getattr__(self, name)}] +Called when an attribute lookup has not found the attribute in the +usual places (i.e. it is not an instance attribute nor is it found in +the class tree for \code{self}). \code{name} is the attribute name. +\ttindex{__getattr__} + +Note that if the attribute is found through the normal mechanism, +\code{__getattr__} is not called. (This is an asymmetry between +\code{__getattr__} and \code{__setattr__}.) +This is done both for efficiency reasons and because otherwise +\code{__getattr__} would have no way to access other attributes of the +instance. +Note that at least for instance variables, \code{__getattr__} can fake +total control by simply not inserting any values in the instance +attribute dictionary. +\ttindex{__setattr__} + +\item[{\tt __setattr__(self, name, value)}] +Called when an attribute assignment is attempted. This is called +instead of the normal mechanism (i.e. store the value as an instance +attribute). \code{name} is the attribute name, \code{value} is the +value to be assigned to it. +\ttindex{__setattr__} + +If \code{__setattr__} wants to assign to an instance attribute, it +should not simply execute \code{self.\var{name} = value} --- this would +cause a recursive call. Instead, it should insert the value in the +dictionary of instance attributes, e.g.\ \code{self.__dict__[name] = +value}. +\ttindex{__dict__} + +\item[{\tt __delattr__(self, name)}] +Like \code{__setattr__} but for attribute deletion instead of +assignment. +\ttindex{__delattr__} + +\end{description} + + +\subsection{Special methods for sequence and mapping types} + +\begin{description} + +\item[{\tt __len__(self)}] +Called to implement the built-in function \function{len()}. Should return +the length of the object, an integer \code{>=} 0. Also, an object +whose \method{__len__()} method returns 0 is considered to be false in a +Boolean context. +\ttindex{__len__} + +\item[{\tt __getitem__(self, key)}] +Called to implement evaluation of \code{self[key]}. Note that the +special interpretation of negative keys (if the class wishes to +emulate a sequence type) is up to the \method{__getitem__()} method. +\ttindex{__getitem__} + +\item[{\tt __setitem__(self, key, value)}] +Called to implement assignment to \code{self[key]}. Same note as for +\method{__getitem__()}. +\ttindex{__setitem__} + +\item[{\tt __delitem__(self, key)}] +Called to implement deletion of \code{self[key]}. Same note as for +\method{__getitem__()}. +\ttindex{__delitem__} + +\end{description} + + +\subsection{Special methods for sequence types} + +\begin{description} + +\item[{\tt __getslice__(self, i, j)}] +Called to implement evaluation of \code{self[i:j]}. Note that missing +\code{i} or \code{j} are replaced by 0 or \code{len(self)}, +respectively, and \code{len(self)} has been added (once) to originally +negative \code{i} or \code{j} by the time this function is called +(unlike for \method{__getitem__()}). +\ttindex{__getslice__} + +\item[{\tt __setslice__(self, i, j, sequence)}] +Called to implement assignment to \code{self[i:j]}. Same notes as for +\method{__getslice__()}. +\ttindex{__setslice__} + +\item[{\tt __delslice__(self, i, j)}] +Called to implement deletion of \code{self[i:j]}. Same notes as for +\method{__getslice__()}. +\ttindex{__delslice__} + +\end{description} + + +\subsection{Special methods for numeric types} + +\begin{description} + +\item[{\tt __add__(self, other)}]\itemjoin +\item[{\tt __sub__(self, other)}]\itemjoin +\item[{\tt __mul__(self, other)}]\itemjoin +\item[{\tt __div__(self, other)}]\itemjoin +\item[{\tt __mod__(self, other)}]\itemjoin +\item[{\tt __divmod__(self, other)}]\itemjoin +\item[{\tt __pow__(self, other)}]\itemjoin +\item[{\tt __lshift__(self, other)}]\itemjoin +\item[{\tt __rshift__(self, other)}]\itemjoin +\item[{\tt __and__(self, other)}]\itemjoin +\item[{\tt __xor__(self, other)}]\itemjoin +\item[{\tt __or__(self, other)}]\itembreak +Called to implement the binary arithmetic operations (\code{+}, +\code{-}, \code{*}, \code{/}, \code{\%}, \function{divmod()}, \function{pow()}, +\code{<<}, \code{>>}, \code{\&}, \code{\^}, \code{|}). +\ttindex{__or__} +\ttindex{__xor__} +\ttindex{__and__} +\ttindex{__rshift__} +\ttindex{__lshift__} +\ttindex{__pow__} +\ttindex{__divmod__} +\ttindex{__mod__} +\ttindex{__div__} +\ttindex{__mul__} +\ttindex{__sub__} +\ttindex{__add__} + +\item[{\tt __neg__(self)}]\itemjoin +\item[{\tt __pos__(self)}]\itemjoin +\item[{\tt __abs__(self)}]\itemjoin +\item[{\tt __invert__(self)}]\itembreak +Called to implement the unary arithmetic operations (\code{-}, \code{+}, +\function{abs()} and \code{~}). +\ttindex{__invert__} +\ttindex{__abs__} +\ttindex{__pos__} +\ttindex{__neg__} + +\item[{\tt __nonzero__(self)}] +Called to implement boolean testing; should return 0 or 1. An +alternative name for this method is \method{__len__()}. +\ttindex{__nonzero__} + +\item[{\tt __coerce__(self, other)}] +Called to implement ``mixed-mode'' numeric arithmetic. Should either +return a tuple containing self and other converted to a common numeric +type, or None if no way of conversion is known. When the common type +would be the type of other, it is sufficient to return None, since the +interpreter will also ask the other object to attempt a coercion (but +sometimes, if the implementation of the other type cannot be changed, +it is useful to do the conversion to the other type here). +\ttindex{__coerce__} + +Note that this method is not called to coerce the arguments to \code{+} +and \code{*}, because these are also used to implement sequence +concatenation and repetition, respectively. Also note that, for the +same reason, in \code{\var{n} * \var{x}}, where \var{n} is a built-in +number and \var{x} is an instance, a call to +\code{\var{x}.__mul__(\var{n})} is made.% +\footnote{The interpreter should really distinguish between +user-defined classes implementing sequences, mappings or numbers, but +currently it doesn't --- hence this strange exception.} +\ttindex{__mul__} + +\item[{\tt __int__(self)}]\itemjoin +\item[{\tt __long__(self)}]\itemjoin +\item[{\tt __float__(self)}]\itembreak +Called to implement the built-in functions \function{int()}, \function{long()} +and \function{float()}. Should return a value of the appropriate type. +\ttindex{__float__} +\ttindex{__long__} +\ttindex{__int__} + +\item[{\tt __oct__(self)}]\itemjoin +\item[{\tt __hex__(self)}]\itembreak +Called to implement the built-in functions \function{oct()} and +\function{hex()}. Should return a string value. +\ttindex{__hex__} +\ttindex{__oct__} + +\end{description} diff --git a/Doc/ref/ref4.tex b/Doc/ref/ref4.tex new file mode 100644 index 0000000..9ab448b --- /dev/null +++ b/Doc/ref/ref4.tex @@ -0,0 +1,200 @@ +\chapter{Execution model} +\index{execution model} + +\section{Code blocks, execution frames, and name spaces} \label{execframes} +\index{code block} +\indexii{execution}{frame} +\index{name space} + +A {\em code block} is a piece of Python program text that can be +executed as a unit, such as a module, a class definition or a function +body. Some code blocks (like modules) are executed only once, others +(like function bodies) may be executed many times. Code blocks may +textually contain other code blocks. Code blocks may invoke other +code blocks (that may or may not be textually contained in them) as +part of their execution, e.g. by invoking (calling) a function. +\index{code block} +\indexii{code}{block} + +The following are code blocks: A module is a code block. A function +body is a code block. A class definition is a code block. Each +command typed interactively is a separate code block; a script file is +a code block. The string argument passed to the built-in function +\function{eval()} and to the \keyword{exec} statement are code blocks. +And finally, the expression read and evaluated by the built-in +function \function{input()} is a code block. + +A code block is executed in an execution frame. An {\em execution +frame} contains some administrative information (used for debugging), +determines where and how execution continues after the code block's +execution has completed, and (perhaps most importantly) defines two +name spaces, the local and the global name space, that affect +execution of the code block. +\indexii{execution}{frame} + +A {\em name space} is a mapping from names (identifiers) to objects. +A particular name space may be referenced by more than one execution +frame, and from other places as well. Adding a name to a name space +is called {\em binding} a name (to an object); changing the mapping of +a name is called {\em rebinding}; removing a name is {\em unbinding}. +Name spaces are functionally equivalent to dictionaries. +\index{name space} +\indexii{binding}{name} +\indexii{rebinding}{name} +\indexii{unbinding}{name} + +The {\em local name space} of an execution frame determines the default +place where names are defined and searched. The {\em global name +space} determines the place where names listed in \keyword{global} +statements are defined and searched, and where names that are not +explicitly bound in the current code block are searched. +\indexii{local}{name space} +\indexii{global}{name space} +\stindex{global} + +Whether a name is local or global in a code block is determined by +static inspection of the source text for the code block: in the +absence of \keyword{global} statements, a name that is bound anywhere in +the code block is local in the entire code block; all other names are +considered global. The \keyword{global} statement forces global +interpretation of selected names throughout the code block. The +following constructs bind names: formal parameters, \keyword{import} +statements, class and function definitions (these bind the class or +function name), and targets that are identifiers if occurring in an +assignment, \keyword{for} loop header, or except clause header. + +A target occurring in a \keyword{del} statement is also considered bound +for this purpose (though the actual semantics are to ``unbind'' the +name). + +When a global name is not found in the global name space, it is +searched in the list of ``built-in'' names (which is actually the +global name space of the module \module{__builtin__}). When a name is not +found at all, the \exception{NameError} exception is raised.% +\footnote{If the code block contains \keyword{exec} statements or the +construct \samp{from \ldots import *}, the semantics of names not +explicitly mentioned in a {\tt global} statement change subtly: name +lookup first searches the local name space, then the global one, then +the built-in one.} +\refbimodindex{__builtin__} +\stindex{from} +\stindex{exec} +\stindex{global} +\withsubitem{(built-in exception)}{\ttindex{NameError}} + +The following table lists the meaning of the local and global name +space for various types of code blocks. The name space for a +particular module is automatically created when the module is first +referenced. Note that in almost all cases, the global name space is +the name space of the containing module --- scopes in Python do not +nest! + +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Code block type & Global name space & Local name space & Notes \\ +\hline +Module & n.s. for this module & same as global & \\ +Script & n.s. for \module{__main__} & same as global & \\ +Interactive command & n.s. for \module{__main__} & same as global & \\ +Class definition & global n.s. of containing block & new n.s. & \\ +Function body & global n.s. of containing block & new n.s. & (2) \\ +String passed to \keyword{exec} statement + & global n.s. of containing block + & local n.s. of containing block & (1) \\ +String passed to \function{eval()} + & global n.s. of caller & local n.s. of caller & (1) \\ +File read by \function{execfile()} + & global n.s. of caller & local n.s. of caller & (1) \\ +Expression read by \function{input()} + & global n.s. of caller & local n.s. of caller & \\ +\hline +\end{tabular} +\end{center} +\refbimodindex{__main__} + +Notes: + +\begin{description} + +\item[n.s.] means {\em name space} + +\item[(1)] The global and local name space for these can be +overridden with optional extra arguments. + +\item[(2)] The body of lambda forms (see section \ref{lambda}) is +treated exactly the same as a (nested) function definition. Lambda +forms have their own name space consisting of their formal arguments. +\indexii{lambda}{form} + +\end{description} + +The built-in functions \function{globals()} and \function{locals()} returns a +dictionary representing the current global and local name space, +respectively. The effect of modifications to this dictionary on the +name space are undefined.% +\footnote{The current implementations return the dictionary actually +used to implement the name space, {\em except} for functions, where +the optimizer may cause the local name space to be implemented +differently, and \function{locals()} returns a read-only dictionary.} + +\section{Exceptions} + +Exceptions are a means of breaking out of the normal flow of control +of a code block in order to handle errors or other exceptional +conditions. An exception is {\em raised} at the point where the error +is detected; it may be {\em handled} by the surrounding code block or +by any code block that directly or indirectly invoked the code block +where the error occurred. +\index{exception} +\index{raise an exception} +\index{handle an exception} +\index{exception handler} +\index{errors} +\index{error handling} + +The Python interpreter raises an exception when it detects an run-time +error (such as division by zero). A Python program can also +explicitly raise an exception with the \keyword{raise} statement. +Exception handlers are specified with the \keyword{try} ... \keyword{except} +statement. + +Python uses the ``termination'' model of error handling: an exception +handler can find out what happened and continue execution at an outer +level, but it cannot repair the cause of the error and retry the +failing operation (except by re-entering the the offending piece of +code from the top). + +When an exception is not handled at all, the interpreter terminates +execution of the program, or returns to its interactive main loop. + +Exceptions are identified by string objects or class instances. Two +different string objects with the same value identify different +exceptions. An exception can be raised with a class instance. Such +exceptions are caught by specifying an except clause that has the +class name (or a base class) as the condition. + +When an exception is raised, an object (maybe \code{None}) is passed +as the exception's ``parameter''; this object does not affect the +selection of an exception handler, but is passed to the selected +exception handler as additional information. For exceptions raised +with a class instance, the instance is passed as the ``parameter''. + +For example: + +\begin{verbatim} +>>> class Error: +... def __init__(self, msg): self.msg = msg +... +>>> class SpecificError(Error): pass +... +>>> try: +... raise SpecificError('broken') +... except Error, obj: +... print obj.msg +... +broken +\end{verbatim} + +See also the description of the \keyword{try} and \keyword{raise} +statements. diff --git a/Doc/ref/ref5.tex b/Doc/ref/ref5.tex new file mode 100644 index 0000000..b2fea3c --- /dev/null +++ b/Doc/ref/ref5.tex @@ -0,0 +1,759 @@ +\chapter{Expressions and conditions} +\index{expression} +\index{condition} + +{\bf Note:} In this and the following chapters, extended BNF notation +will be used to describe syntax, not lexical analysis. +\index{BNF} + +This chapter explains the meaning of the elements of expressions and +conditions. Conditions are a superset of expressions, and a condition +may be used wherever an expression is required by enclosing it in +parentheses. The only places where expressions are used in the syntax +instead of conditions is in expression statements and on the +right-hand side of assignment statements; this catches some nasty bugs +like accidentally writing \verb@x == 1@ instead of \verb@x = 1@. +\indexii{assignment}{statement} + +The comma plays several roles in Python's syntax. It is usually an +operator with a lower precedence than all others, but occasionally +serves other purposes as well; e.g. it separates function arguments, +is used in list and dictionary constructors, and has special semantics +in \verb@print@ statements. +\index{comma} + +When (one alternative of) a syntax rule has the form + +\begin{verbatim} +name: othername +\end{verbatim} + +and no semantics are given, the semantics of this form of \verb@name@ +are the same as for \verb@othername@. +\index{syntax} + +\section{Arithmetic conversions} +\indexii{arithmetic}{conversion} + +When a description of an arithmetic operator below uses the phrase +``the numeric arguments are converted to a common type'', +this both means that if either argument is not a number, a +\verb@TypeError@ exception is raised, and that otherwise +the following conversions are applied: +\exindex{TypeError} +\indexii{floating point}{number} +\indexii{long}{integer} +\indexii{plain}{integer} + +\begin{itemize} +\item first, if either argument is a floating point number, + the other is converted to floating point; +\item else, if either argument is a long integer, + the other is converted to long integer; +\item otherwise, both must be plain integers and no conversion + is necessary. +\end{itemize} + +\section{Atoms} +\index{atom} + +Atoms are the most basic elements of expressions. Forms enclosed in +reverse quotes or in parentheses, brackets or braces are also +categorized syntactically as atoms. The syntax for atoms is: + +\begin{verbatim} +atom: identifier | literal | enclosure +enclosure: parenth_form|list_display|dict_display|string_conversion +\end{verbatim} + +\subsection{Identifiers (Names)} +\index{name} +\index{identifier} + +An identifier occurring as an atom is a reference to a local, global +or built-in name binding. If a name is assigned to anywhere in a code +block (even in unreachable code), and is not mentioned in a +\verb@global@ statement in that code block, then it refers to a local +name throughout that code block. When it is not assigned to anywhere +in the block, or when it is assigned to but also explicitly listed in +a \verb@global@ statement, it refers to a global name if one exists, +else to a built-in name (and this binding may dynamically change). +\indexii{name}{binding} +\index{code block} +\stindex{global} +\indexii{built-in}{name} +\indexii{global}{name} + +When the name is bound to an object, evaluation of the atom yields +that object. When a name is not bound, an attempt to evaluate it +raises a \verb@NameError@ exception. +\exindex{NameError} + +\subsection{Literals} +\index{literal} + +Python knows string and numeric literals: + +\begin{verbatim} +literal: stringliteral | integer | longinteger | floatnumber +\end{verbatim} + +Evaluation of a literal yields an object of the given type (string, +integer, long integer, floating point number) with the given value. +The value may be approximated in the case of floating point literals. +See section \ref{literals} for details. + +All literals correspond to immutable data types, and hence the +object's identity is less important than its value. Multiple +evaluations of literals with the same value (either the same +occurrence in the program text or a different occurrence) may obtain +the same object or a different object with the same value. +\indexiii{immutable}{data}{type} + +(In the original implementation, all literals in the same code block +with the same type and value yield the same object.) + +\subsection{Parenthesized forms} +\index{parenthesized form} + +A parenthesized form is an optional condition list enclosed in +parentheses: + +\begin{verbatim} +parenth_form: "(" [condition_list] ")" +\end{verbatim} + +A parenthesized condition list yields whatever that condition list +yields. + +An empty pair of parentheses yields an empty tuple object. Since +tuples are immutable, the rules for literals apply here. +\indexii{empty}{tuple} + +(Note that tuples are not formed by the parentheses, but rather by use +of the comma operator. The exception is the empty tuple, for which +parentheses {\em are} required --- allowing unparenthesized ``nothing'' +in expressions would cause ambiguities and allow common typos to +pass uncaught.) +\index{comma} +\indexii{tuple}{display} + +\subsection{List displays} +\indexii{list}{display} + +A list display is a possibly empty series of conditions enclosed in +square brackets: + +\begin{verbatim} +list_display: "[" [condition_list] "]" +\end{verbatim} + +A list display yields a new list object. +\obindex{list} + +If it has no condition list, the list object has no items. Otherwise, +the elements of the condition list are evaluated from left to right +and inserted in the list object in that order. +\indexii{empty}{list} + +\subsection{Dictionary displays} \label{dict} +\indexii{dictionary}{display} + +A dictionary display is a possibly empty series of key/datum pairs +enclosed in curly braces: +\index{key} +\index{datum} +\index{key/datum pair} + +\begin{verbatim} +dict_display: "{" [key_datum_list] "}" +key_datum_list: key_datum ("," key_datum)* [","] +key_datum: condition ":" condition +\end{verbatim} + +A dictionary display yields a new dictionary object. +\obindex{dictionary} + +The key/datum pairs are evaluated from left to right to define the +entries of the dictionary: each key object is used as a key into the +dictionary to store the corresponding datum. + +Restrictions on the types of the key values are listed earlier in +section \ref{types}. +Clashes between duplicate keys are not detected; the last +datum (textually rightmost in the display) stored for a given key +value prevails. +\exindex{TypeError} + +\subsection{String conversions} +\indexii{string}{conversion} +\indexii{reverse}{quotes} +\indexii{backward}{quotes} +\index{back-quotes} + +A string conversion is a condition list enclosed in reverse (or +backward) quotes: + +\begin{verbatim} +string_conversion: "`" condition_list "`" +\end{verbatim} + +A string conversion evaluates the contained condition list and +converts the resulting object into a string according to rules +specific to its type. + +If the object is a string, a number, \verb@None@, or a tuple, list or +dictionary containing only objects whose type is one of these, the +resulting string is a valid Python expression which can be passed to +the built-in function \verb@eval()@ to yield an expression with the +same value (or an approximation, if floating point numbers are +involved). + +(In particular, converting a string adds quotes around it and converts +``funny'' characters to escape sequences that are safe to print.) + +It is illegal to attempt to convert recursive objects (e.g. lists or +dictionaries that contain a reference to themselves, directly or +indirectly.) +\obindex{recursive} + +The built-in function \verb@repr()@ performs exactly the same +conversion in its argument as enclosing it it reverse quotes does. +The built-in function \verb@str()@ performs a similar but more +user-friendly conversion. +\bifuncindex{repr} +\bifuncindex{str} + +\section{Primaries} \label{primaries} +\index{primary} + +Primaries represent the most tightly bound operations of the language. +Their syntax is: + +\begin{verbatim} +primary: atom | attributeref | subscription | slicing | call +\end{verbatim} + +\subsection{Attribute references} +\indexii{attribute}{reference} + +An attribute reference is a primary followed by a period and a name: + +\begin{verbatim} +attributeref: primary "." identifier +\end{verbatim} + +The primary must evaluate to an object of a type that supports +attribute references, e.g. a module or a list. This object is then +asked to produce the attribute whose name is the identifier. If this +attribute is not available, the exception \verb@AttributeError@ is +raised. Otherwise, the type and value of the object produced is +determined by the object. Multiple evaluations of the same attribute +reference may yield different objects. +\obindex{module} +\obindex{list} + +\subsection{Subscriptions} +\index{subscription} + +A subscription selects an item of a sequence (string, tuple or list) +or mapping (dictionary) object: +\obindex{sequence} +\obindex{mapping} +\obindex{string} +\obindex{tuple} +\obindex{list} +\obindex{dictionary} +\indexii{sequence}{item} + +\begin{verbatim} +subscription: primary "[" condition "]" +\end{verbatim} + +The primary must evaluate to an object of a sequence or mapping type. + +If it is a mapping, the condition must evaluate to an object whose +value is one of the keys of the mapping, and the subscription selects +the value in the mapping that corresponds to that key. + +If it is a sequence, the condition must evaluate to a plain integer. +If this value is negative, the length of the sequence is added to it +(so that, e.g. \verb@x[-1]@ selects the last item of \verb@x@.) +The resulting value must be a nonnegative integer smaller than the +number of items in the sequence, and the subscription selects the item +whose index is that value (counting from zero). + +A string's items are characters. A character is not a separate data +type but a string of exactly one character. +\index{character} +\indexii{string}{item} + +\subsection{Slicings} +\index{slicing} +\index{slice} + +A slicing (or slice) selects a range of items in a sequence (string, +tuple or list) object: +\obindex{sequence} +\obindex{string} +\obindex{tuple} +\obindex{list} + +\begin{verbatim} +slicing: primary "[" [condition] ":" [condition] "]" +\end{verbatim} + +The primary must evaluate to a sequence object. The lower and upper +bound expressions, if present, must evaluate to plain integers; +defaults are zero and the sequence's length, respectively. If either +bound is negative, the sequence's length is added to it. The slicing +now selects all items with index \var{k} such that +\code{\var{i} <= \var{k} < \var{j}} where \var{i} +and \var{j} are the specified lower and upper bounds. This may be an +empty sequence. It is not an error if \var{i} or \var{j} lie outside the +range of valid indexes (such items don't exist so they aren't +selected). + +\subsection{Calls} \label{calls} +\index{call} + +A call calls a callable object (e.g. a function) with a possibly empty +series of arguments:\footnote{The new syntax for keyword arguments is +not yet documented in this manual. See chapter 12 of the Tutorial.} +\obindex{callable} + +\begin{verbatim} +call: primary "(" [condition_list] ")" +\end{verbatim} + +The primary must evaluate to a callable object (user-defined +functions, built-in functions, methods of built-in objects, class +objects, and methods of class instances are callable). If it is a +class, the argument list must be empty; otherwise, the arguments are +evaluated. + +A call always returns some value, possibly \verb@None@, unless it +raises an exception. How this value is computed depends on the type +of the callable object. If it is: + +\begin{description} + +\item[a user-defined function:] the code block for the function is +executed, passing it the argument list. The first thing the code +block will do is bind the formal parameters to the arguments; this is +described in section \ref{function}. When the code block executes a +\verb@return@ statement, this specifies the return value of the +function call. +\indexii{function}{call} +\indexiii{user-defined}{function}{call} +\obindex{user-defined function} +\obindex{function} + +\item[a built-in function or method:] the result is up to the +interpreter; see the library reference manual for the descriptions of +built-in functions and methods. +\indexii{function}{call} +\indexii{built-in function}{call} +\indexii{method}{call} +\indexii{built-in method}{call} +\obindex{built-in method} +\obindex{built-in function} +\obindex{method} +\obindex{function} + +\item[a class object:] a new instance of that class is returned. +\obindex{class} +\indexii{class object}{call} + +\item[a class instance method:] the corresponding user-defined +function is called, with an argument list that is one longer than the +argument list of the call: the instance becomes the first argument. +\obindex{class instance} +\obindex{instance} +\indexii{instance}{call} +\indexii{class instance}{call} + +\end{description} + +\section{Unary arithmetic operations} +\indexiii{unary}{arithmetic}{operation} +\indexiii{unary}{bit-wise}{operation} + +All unary arithmetic (and bit-wise) operations have the same priority: + +\begin{verbatim} +u_expr: primary | "-" u_expr | "+" u_expr | "~" u_expr +\end{verbatim} + +The unary \verb@"-"@ (minus) operator yields the negation of its +numeric argument. +\index{negation} +\index{minus} + +The unary \verb@"+"@ (plus) operator yields its numeric argument +unchanged. +\index{plus} + +The unary \verb@"~"@ (invert) operator yields the bit-wise inversion +of its plain or long integer argument. The bit-wise inversion of +\verb@x@ is defined as \verb@-(x+1)@. +\index{inversion} + +In all three cases, if the argument does not have the proper type, +a \verb@TypeError@ exception is raised. +\exindex{TypeError} + +\section{Binary arithmetic operations} +\indexiii{binary}{arithmetic}{operation} + +The binary arithmetic operations have the conventional priority +levels. Note that some of these operations also apply to certain +non-numeric types. There is no ``power'' operator, so there are only +two levels, one for multiplicative operators and one for additive +operators: + +\begin{verbatim} +m_expr: u_expr | m_expr "*" u_expr + | m_expr "/" u_expr | m_expr "%" u_expr +a_expr: m_expr | aexpr "+" m_expr | aexpr "-" m_expr +\end{verbatim} + +The \verb@"*"@ (multiplication) operator yields the product of its +arguments. The arguments must either both be numbers, or one argument +must be a plain integer and the other must be a sequence. In the +former case, the numbers are converted to a common type and then +multiplied together. In the latter case, sequence repetition is +performed; a negative repetition factor yields an empty sequence. +\index{multiplication} + +The \verb@"/"@ (division) operator yields the quotient of its +arguments. The numeric arguments are first converted to a common +type. Plain or long integer division yields an integer of the same +type; the result is that of mathematical division with the `floor' +function applied to the result. Division by zero raises the +\verb@ZeroDivisionError@ exception. +\exindex{ZeroDivisionError} +\index{division} + +The \verb@"%"@ (modulo) operator yields the remainder from the +division of the first argument by the second. The numeric arguments +are first converted to a common type. A zero right argument raises +the \verb@ZeroDivisionError@ exception. The arguments may be floating +point numbers, e.g. \verb@3.14 % 0.7@ equals \verb@0.34@. The modulo +operator always yields a result with the same sign as its second +operand (or zero); the absolute value of the result is strictly +smaller than the second operand. +\index{modulo} + +The integer division and modulo operators are connected by the +following identity: \verb@x == (x/y)*y + (x%y)@. Integer division and +modulo are also connected with the built-in function \verb@divmod()@: +\verb@divmod(x, y) == (x/y, x%y)@. These identities don't hold for +floating point numbers; there a similar identity holds where +\verb@x/y@ is replaced by \verb@floor(x/y)@). + +The \verb@"+"@ (addition) operator yields the sum of its arguments. +The arguments must either both be numbers, or both sequences of the +same type. In the former case, the numbers are converted to a common +type and then added together. In the latter case, the sequences are +concatenated. +\index{addition} + +The \verb@"-"@ (subtraction) operator yields the difference of its +arguments. The numeric arguments are first converted to a common +type. +\index{subtraction} + +\section{Shifting operations} +\indexii{shifting}{operation} + +The shifting operations have lower priority than the arithmetic +operations: + +\begin{verbatim} +shift_expr: a_expr | shift_expr ( "<<" | ">>" ) a_expr +\end{verbatim} + +These operators accept plain or long integers as arguments. The +arguments are converted to a common type. They shift the first +argument to the left or right by the number of bits given by the +second argument. + +A right shift by \var{n} bits is defined as division by +\code{pow(2,\var{n})}. A left shift by \var{n} bits is defined as +multiplication with \code{pow(2,\var{n})}; for plain integers there is +no overflow check so this drops bits and flips the sign if the result +is not less than \code{pow(2,31)} in absolute value. + +Negative shift counts raise a \verb@ValueError@ exception. +\exindex{ValueError} + +\section{Binary bit-wise operations} +\indexiii{binary}{bit-wise}{operation} + +Each of the three bitwise operations has a different priority level: + +\begin{verbatim} +and_expr: shift_expr | and_expr "&" shift_expr +xor_expr: and_expr | xor_expr "^" and_expr +or_expr: xor_expr | or_expr "|" xor_expr +\end{verbatim} + +The \verb@"&"@ operator yields the bitwise AND of its arguments, which +must be plain or long integers. The arguments are converted to a +common type. +\indexii{bit-wise}{and} + +The \verb@"^"@ operator yields the bitwise XOR (exclusive OR) of its +arguments, which must be plain or long integers. The arguments are +converted to a common type. +\indexii{bit-wise}{xor} +\indexii{exclusive}{or} + +The \verb@"|"@ operator yields the bitwise (inclusive) OR of its +arguments, which must be plain or long integers. The arguments are +converted to a common type. +\indexii{bit-wise}{or} +\indexii{inclusive}{or} + +\section{Comparisons} +\index{comparison} + +Contrary to C, all comparison operations in Python have the same +priority, which is lower than that of any arithmetic, shifting or +bitwise operation. Also contrary to C, expressions like +\verb@a < b < c@ have the interpretation that is conventional in +mathematics: +\index{C} + +\begin{verbatim} +comparison: or_expr (comp_operator or_expr)* +comp_operator: "<"|">"|"=="|">="|"<="|"<>"|"!="|"is" ["not"]|["not"] "in" +\end{verbatim} + +Comparisons yield integer values: 1 for true, 0 for false. + +Comparisons can be chained arbitrarily, e.g. \code{x < y <= z} is +equivalent to \code{x < y and y <= z}, except that \code{y} is +evaluated only once (but in both cases \code{z} is not evaluated at all +when \code{x < y} is found to be false). +\indexii{chaining}{comparisons} + +Formally, if \var{a}, \var{b}, \var{c}, \ldots, \var{y}, \var{z} are +expressions and \var{opa}, \var{opb}, \ldots, \var{opy} are comparison +operators, then \var{a opa b opb c} \ldots \var{y opy z} is equivalent +to \var{a opa b} \code{and} \var{b opb c} \code{and} \ldots \code{and} +\var{y opy z}, except that each expression is evaluated at most once. + +Note that \var{a opa b opb c} doesn't imply any kind of comparison +between \var{a} and \var{c}, so that e.g.\ \code{x < y > z} is +perfectly legal (though perhaps not pretty). + +The forms \verb@<>@ and \verb@!=@ are equivalent; for consistency with +C, \verb@!=@ is preferred; where \verb@!=@ is mentioned below +\verb@<>@ is also implied. + +The operators {\tt "<", ">", "==", ">=", "<="}, and {\tt "!="} compare +the values of two objects. The objects needn't have the same type. +If both are numbers, they are coverted to a common type. Otherwise, +objects of different types {\em always} compare unequal, and are +ordered consistently but arbitrarily. + +(This unusual definition of comparison is done to simplify the +definition of operations like sorting and the \verb@in@ and +\verb@not@ \verb@in@ operators.) + +Comparison of objects of the same type depends on the type: + +\begin{itemize} + +\item +Numbers are compared arithmetically. + +\item +Strings are compared lexicographically using the numeric equivalents +(the result of the built-in function \verb@ord@) of their characters. + +\item +Tuples and lists are compared lexicographically using comparison of +corresponding items. + +\item +Mappings (dictionaries) are compared through lexicographic +comparison of their sorted (key, value) lists.% +\footnote{This is expensive since it requires sorting the keys first, +but about the only sensible definition. An earlier version of Python +compared dictionaries by identity only, but this caused surprises +because people expected to be able to test a dictionary for emptiness +by comparing it to {\tt \{\}}.} + +\item +Most other types compare unequal unless they are the same object; +the choice whether one object is considered smaller or larger than +another one is made arbitrarily but consistently within one +execution of a program. + +\end{itemize} + +The operators \verb@in@ and \verb@not in@ test for sequence +membership: if \var{y} is a sequence, \code{\var{x} in \var{y}} is +true if and only if there exists an index \var{i} such that +\code{\var{x} = \var{y}[\var{i}]}. +\code{\var{x} not in \var{y}} yields the inverse truth value. The +exception \verb@TypeError@ is raised when \var{y} is not a sequence, +or when \var{y} is a string and \var{x} is not a string of length one.% +\footnote{The latter restriction is sometimes a nuisance.} +\opindex{in} +\opindex{not in} +\indexii{membership}{test} +\obindex{sequence} + +The operators \verb@is@ and \verb@is not@ test for object identity: +\var{x} \code{is} \var{y} is true if and only if \var{x} and \var{y} +are the same object. \var{x} \code{is not} \var{y} yields the inverse +truth value. +\opindex{is} +\opindex{is not} +\indexii{identity}{test} + +\section{Boolean operations} \label{Booleans} +\indexii{Boolean}{operation} + +Boolean operations have the lowest priority of all Python operations: + +\begin{verbatim} +condition: or_test | lambda_form +or_test: and_test | or_test "or" and_test +and_test: not_test | and_test "and" not_test +not_test: comparison | "not" not_test +lambda_form: "lambda" [parameter_list]: condition +\end{verbatim} + +In the context of Boolean operations, and also when conditions are +used by control flow statements, the following values are interpreted +as false: \verb@None@, numeric zero of all types, empty sequences +(strings, tuples and lists), and empty mappings (dictionaries). All +other values are interpreted as true. + +The operator \verb@not@ yields 1 if its argument is false, 0 otherwise. +\opindex{not} + +The condition \var{x} \verb@and@ \var{y} first evaluates \var{x}; if +\var{x} is false, its value is returned; otherwise, \var{y} is +evaluated and the resulting value is returned. +\opindex{and} + +The condition \var{x} \verb@or@ \var{y} first evaluates \var{x}; if +\var{x} is true, its value is returned; otherwise, \var{y} is +evaluated and the resulting value is returned. +\opindex{or} + +(Note that \verb@and@ and \verb@or@ do not restrict the value and type +they return to 0 and 1, but rather return the last evaluated argument. +This is sometimes useful, e.g. if \verb@s@ is a string that should be +replaced by a default value if it is empty, the expression +\verb@s or 'foo'@ yields the desired value. Because \verb@not@ has to +invent a value anyway, it does not bother to return a value of the +same type as its argument, so e.g. \verb@not 'foo'@ yields \verb@0@, +not \verb@''@.) + +Lambda forms (lambda expressions) have the same syntactic position as +conditions. They are a shorthand to create anonymous functions; the +expression {\em {\tt lambda} arguments{\tt :} condition} +yields a function object that behaves virtually identical to one +defined with +{\em {\tt def} name {\tt (}arguments{\tt ): return} condition}. +See section \ref{function} for the syntax of +parameter lists. Note that functions created with lambda forms cannot +contain statements. +\label{lambda} +\indexii{lambda}{expression} +\indexii{lambda}{form} +\indexii{anonmymous}{function} + +\section{Expression lists and condition lists} +\indexii{expression}{list} +\indexii{condition}{list} + +\begin{verbatim} +expression_list: or_expr ("," or_expr)* [","] +condintion_list: condition ("," condition)* [","] +\end{verbatim} + +The only difference between expression lists and condition lists is +the lowest priority of operators that can be used in them without +being enclosed in parentheses; condition lists allow all operators, +while expression lists don't allow comparisons and Boolean operators +(they do allow bitwise and shift operators though). + +Expression lists are used in expression statements and assignments; +condition lists are used everywhere else where a list of +comma-separated values is required. + +An expression (condition) list containing at least one comma yields a +tuple. The length of the tuple is the number of expressions +(conditions) in the list. The expressions (conditions) are evaluated +from left to right. (Condition lists are used syntactically is a few +places where no tuple is constructed but a list of values is needed +nevertheless.) +\obindex{tuple} + +The trailing comma is required only to create a single tuple (a.k.a. a +{\em singleton}); it is optional in all other cases. A single +expression (condition) without a trailing comma doesn't create a +tuple, but rather yields the value of that expression (condition). +\indexii{trailing}{comma} + +(To create an empty tuple, use an empty pair of parentheses: +\verb@()@.) + +\section{Summary} + +The following table summarizes the operator precedences in Python, +from lowest precedence (least binding) to highest precedence (most +binding). Operators in the same box have the same precedence. Unless +the syntax is explicitly given, operators are binary. Operators in +the same box group left to right (except for comparisons, which +chain from left to right --- see above). + +\begin{center} +\begin{tabular}{|c|c|} +\hline +\code{or} & Boolean OR \\ +\hline +\code{and} & Boolean AND \\ +\hline +\code{not} \var{x} & Boolean NOT \\ +\hline +\code{in}, \code{not} \code{in} & Membership tests \\ +\code{is}, \code{is} \code{not} & Identity tests \\ +\code{<}, \code{<=}, \code{>}, \code{>=}, \code{<>}, \code{!=}, \code{=} & + Comparisons \\ +\hline +\code{|} & Bitwise OR \\ +\hline +\code{\^} & Bitwise XOR \\ +\hline +\code{\&} & Bitwise AND \\ +\hline +\code{<<}, \code{>>} & Shifts \\ +\hline +\code{+}, \code{-} & Addition and subtraction \\ +\hline +\code{*}, \code{/}, \code{\%} & Multiplication, division, remainder \\ +\hline +\code{+\var{x}}, \code{-\var{x}} & Positive, negative \\ +\code{\~\var{x}} & Bitwise not \\ +\hline +\code{\var{x}.\var{attribute}} & Attribute reference \\ +\code{\var{x}[\var{index}]} & Subscription \\ +\code{\var{x}[\var{index}:\var{index}]} & Slicing \\ +\code{\var{f}(\var{arguments}...)} & Function call \\ +\hline +\code{(\var{expressions}\ldots)} & Binding or tuple display \\ +\code{[\var{expressions}\ldots]} & List display \\ +\code{\{\var{key}:\var{datum}\ldots\}} & Dictionary display \\ +\code{`\var{expression}\ldots`} & String conversion \\ +\hline +\end{tabular} +\end{center} diff --git a/Doc/ref/ref6.tex b/Doc/ref/ref6.tex new file mode 100644 index 0000000..20c65f2 --- /dev/null +++ b/Doc/ref/ref6.tex @@ -0,0 +1,512 @@ +\chapter{Simple statements} +\indexii{simple}{statement} + +Simple statements are comprised within a single logical line. +Several simple statements may occur on a single line separated +by semicolons. The syntax for simple statements is: + +\begin{verbatim} +simple_stmt: expression_stmt + | assignment_stmt + | pass_stmt + | del_stmt + | print_stmt + | return_stmt + | raise_stmt + | break_stmt + | continue_stmt + | import_stmt + | global_stmt + | exec_stmt +\end{verbatim} + +\section{Expression statements} +\indexii{expression}{statement} + +Expression statements are used (mostly interactively) to compute and +write a value, or (usually) to call a procedure (a function that +returns no meaningful result; in Python, procedures return the value +\code{None}): + +\begin{verbatim} +expression_stmt: condition_list +\end{verbatim} + +An expression statement evaluates the condition list (which may be a +single condition). +\indexii{expression}{list} + +In interactive mode, if the value is not \code{None}, it is converted +to a string using the rules for string conversions (expressions in +reverse quotes), and the resulting string is written to standard +output (see section \ref{print}) on a line by itself. +(The exception for \code{None} is made so that procedure calls, which +are syntactically equivalent to expressions, do not cause any output.) +\ttindex{None} +\indexii{string}{conversion} +\index{output} +\indexii{standard}{output} +\indexii{writing}{values} +\indexii{procedure}{call} + +\section{Assignment statements} +\indexii{assignment}{statement} + +Assignment statements are used to (re)bind names to values and to +modify attributes or items of mutable objects: +\indexii{binding}{name} +\indexii{rebinding}{name} +\obindex{mutable} +\indexii{attribute}{assignment} + +\begin{verbatim} +assignment_stmt: (target_list "=")+ expression_list +target_list: target ("," target)* [","] +target: identifier | "(" target_list ")" | "[" target_list "]" + | attributeref | subscription | slicing +\end{verbatim} + +(See section \ref{primaries} for the syntax definitions for the last +three symbols.) + +An assignment statement evaluates the expression list (remember that +this can be a single expression or a comma-separated list, the latter +yielding a tuple) and assigns the single resulting object to each of +the target lists, from left to right. +\indexii{expression}{list} + +Assignment is defined recursively depending on the form of the target +(list). When a target is part of a mutable object (an attribute +reference, subscription or slicing), the mutable object must +ultimately perform the assignment and decide about its validity, and +may raise an exception if the assignment is unacceptable. The rules +observed by various types and the exceptions raised are given with the +definition of the object types (see section \ref{types}). +\index{target} +\indexii{target}{list} + +Assignment of an object to a target list is recursively defined as +follows. +\indexiii{target}{list}{assignment} + +\begin{itemize} +\item +If the target list is a single target: the object is assigned to that +target. + +\item +If the target list is a comma-separated list of targets: the object +must be a tuple with the same number of items as the list contains +targets, and the items are assigned, from left to right, to the +corresponding targets. + +\end{itemize} + +Assignment of an object to a single target is recursively defined as +follows. + +\begin{itemize} % nested + +\item +If the target is an identifier (name): + +\begin{itemize} + +\item +If the name does not occur in a \keyword{global} statement in the current +code block: the name is bound to the object in the current local name +space. +\stindex{global} + +\item +Otherwise: the name is bound to the object in the current global name +space. + +\end{itemize} % nested + +The name is rebound if it was already bound. + +\item +If the target is a target list enclosed in parentheses: the object is +assigned to that target list as described above. + +\item +If the target is a target list enclosed in square brackets: the object +must be a list with the same number of items as the target list +contains targets, and its items are assigned, from left to right, to +the corresponding targets. + +\item +If the target is an attribute reference: The primary expression in the +reference is evaluated. It should yield an object with assignable +attributes; if this is not the case, \exception{TypeError} is raised. That +object is then asked to assign the assigned object to the given +attribute; if it cannot perform the assignment, it raises an exception +(usually but not necessarily \exception{AttributeError}). +\indexii{attribute}{assignment} + +\item +If the target is a subscription: The primary expression in the +reference is evaluated. It should yield either a mutable sequence +(list) object or a mapping (dictionary) object. Next, the subscript +expression is evaluated. +\indexii{subscription}{assignment} +\obindex{mutable} + +If the primary is a mutable sequence object (a list), the subscript +must yield a plain integer. If it is negative, the sequence's length +is added to it. The resulting value must be a nonnegative integer +less than the sequence's length, and the sequence is asked to assign +the assigned object to its item with that index. If the index is out +of range, \exception{IndexError} is raised (assignment to a subscripted +sequence cannot add new items to a list). +\obindex{sequence} +\obindex{list} + +If the primary is a mapping (dictionary) object, the subscript must +have a type compatible with the mapping's key type, and the mapping is +then asked to create a key/datum pair which maps the subscript to +the assigned object. This can either replace an existing key/value +pair with the same key value, or insert a new key/value pair (if no +key with the same value existed). +\obindex{mapping} +\obindex{dictionary} + +\item +If the target is a slicing: The primary expression in the reference is +evaluated. It should yield a mutable sequence object (e.g. a list). The +assigned object should be a sequence object of the same type. Next, +the lower and upper bound expressions are evaluated, insofar they are +present; defaults are zero and the sequence's length. The bounds +should evaluate to (small) integers. If either bound is negative, the +sequence's length is added to it. The resulting bounds are clipped to +lie between zero and the sequence's length, inclusive. Finally, the +sequence object is asked to replace the slice with the items of the +assigned sequence. The length of the slice may be different from the +length of the assigned sequence, thus changing the length of the +target sequence, if the object allows it. +\indexii{slicing}{assignment} + +\end{itemize} + +(In the current implementation, the syntax for targets is taken +to be the same as for expressions, and invalid syntax is rejected +during the code generation phase, causing less detailed error +messages.) + +WARNING: Although the definition of assignment implies that overlaps +between the left-hand side and the right-hand side are `safe' (e.g. +\verb@a, b = b, a@ swaps two variables), overlaps within the +collection of assigned-to variables are not safe! For instance, the +following program prints \code@[0, 2]@: + +\begin{verbatim} +x = [0, 1] +i = 0 +i, x[i] = 1, 2 +print x +\end{verbatim} + + +\section{The {\tt pass} statement} +\stindex{pass} + +\begin{verbatim} +pass_stmt: "pass" +\end{verbatim} + +\keyword{pass} is a null operation --- when it is executed, nothing +happens. It is useful as a placeholder when a statement is +required syntactically, but no code needs to be executed, for example: +\indexii{null}{operation} + +\begin{verbatim} +def f(arg): pass # a function that does nothing (yet) + +class C: pass # a class with no methods (yet) +\end{verbatim} + +\section{The {\tt del} statement} +\stindex{del} + +\begin{verbatim} +del_stmt: "del" target_list +\end{verbatim} + +Deletion is recursively defined very similar to the way assignment is +defined. Rather that spelling it out in full details, here are some +hints. +\indexii{deletion}{target} +\indexiii{deletion}{target}{list} + +Deletion of a target list recursively deletes each target, from left +to right. + +Deletion of a name removes the binding of that name (which must exist) +from the local or global name space, depending on whether the name +occurs in a \keyword{global} statement in the same code block. +\stindex{global} +\indexii{unbinding}{name} + +Deletion of attribute references, subscriptions and slicings +is passed to the primary object involved; deletion of a slicing +is in general equivalent to assignment of an empty slice of the +right type (but even this is determined by the sliced object). +\indexii{attribute}{deletion} + +\section{The {\tt print} statement} \label{print} +\stindex{print} + +\begin{verbatim} +print_stmt: "print" [ condition ("," condition)* [","] ] +\end{verbatim} + +\keyword{print} evaluates each condition in turn and writes the resulting +object to standard output (see below). If an object is not a string, +it is first converted to a string using the rules for string +conversions. The (resulting or original) string is then written. A +space is written before each object is (converted and) written, unless +the output system believes it is positioned at the beginning of a +line. This is the case: (1) when no characters have yet been written +to standard output; or (2) when the last character written to standard +output is \verb/\n/; or (3) when the last write operation on standard +output was not a \keyword{print} statement. (In some cases it may be +functional to write an empty string to standard output for this +reason.) +\index{output} +\indexii{writing}{values} + +A \verb/"\n"/ character is written at the end, unless the \keyword{print} +statement ends with a comma. This is the only action if the statement +contains just the keyword \keyword{print}. +\indexii{trailing}{comma} +\indexii{newline}{suppression} + +Standard output is defined as the file object named \verb@stdout@ +in the built-in module \verb@sys@. If no such object exists, +or if it is not a writable file, a \exception{RuntimeError} exception is raised. +(The original implementation attempts to write to the system's original +standard output instead, but this is not safe, and should be fixed.) +\indexii{standard}{output} +\refbimodindex{sys} +\ttindex{stdout} +\exindex{RuntimeError} + +\section{The {\tt return} statement} +\stindex{return} + +\begin{verbatim} +return_stmt: "return" [condition_list] +\end{verbatim} + +\keyword{return} may only occur syntactically nested in a function +definition, not within a nested class definition. +\indexii{function}{definition} +\indexii{class}{definition} + +If a condition list is present, it is evaluated, else \code{None} +is substituted. + +\keyword{return} leaves the current function call with the condition +list (or \code{None}) as return value. + +When \keyword{return} passes control out of a \keyword{try} statement +with a finally clause, that finally clause is executed +before really leaving the function. +\kwindex{finally} + +\section{The {\tt raise} statement} +\stindex{raise} + +\begin{verbatim} +raise_stmt: "raise" condition ["," condition ["," condition]] +\end{verbatim} + +\keyword{raise} evaluates its first condition, which must yield +a string, class, or instance object. If there is a second condition, +this is evaluated, else \code{None} is substituted. If the first +condition is a class object, then the second condition must be an +instance of that class or one of its derivatives. If the first +condition is an instance object, the second condition must be +\code{None}. +\index{exception} +\indexii{raising}{exception} + +If the first object is a class or string, it then raises the exception +identified by the first object, with the second one (or \code{None}) +as its parameter. If the first object is an instance, it raises the +exception identified by the class of the object, with the instance as +its parameter (and there should be no second object, or the second +object should be \code{None}). + +If a third object is present, and it it not \code{None}, it should be +a traceback object (see section \ref{traceback}), and it is +substituted instead of the current location as the place where the +exception occurred. This is useful to re-raise an exception +transparently in an except clause. +\obindex{traceback} + +\section{The {\tt break} statement} +\stindex{break} + +\begin{verbatim} +break_stmt: "break" +\end{verbatim} + +\keyword{break} may only occur syntactically nested in a \keyword{for} +or \keyword{while} loop, but not nested in a function or class definition +within that loop. +\stindex{for} +\stindex{while} +\indexii{loop}{statement} + +It terminates the nearest enclosing loop, skipping the optional +else clause if the loop has one. +\kwindex{else} + +If a \keyword{for} loop is terminated by \keyword{break}, the loop control +target keeps its current value. +\indexii{loop control}{target} + +When \keyword{break} passes control out of a \keyword{try} statement +with a finally clause, that finally clause is executed +before really leaving the loop. +\kwindex{finally} + +\section{The {\tt continue} statement} +\stindex{continue} + +\begin{verbatim} +continue_stmt: "continue" +\end{verbatim} + +\keyword{continue} may only occur syntactically nested in a \keyword{for} or +\keyword{while} loop, but not nested in a function or class definition or +\keyword{try} statement within that loop.\footnote{Except that it may +currently occur within an {\tt except} clause.} +\stindex{for} +\stindex{while} +\indexii{loop}{statement} +\kwindex{finally} + +It continues with the next cycle of the nearest enclosing loop. + +\section{The {\tt import} statement} \label{import} +\stindex{import} + +\begin{verbatim} +import_stmt: "import" identifier ("," identifier)* + | "from" identifier "import" identifier ("," identifier)* + | "from" identifier "import" "*" +\end{verbatim} + +Import statements are executed in two steps: (1) find a module, and +initialize it if necessary; (2) define a name or names in the local +name space (of the scope where the \keyword{import} statement occurs). +The first form (without \keyword{from}) repeats these steps for each +identifier in the list, the \keyword{from} form performs them once, with +the first identifier specifying the module name. +\indexii{importing}{module} +\indexii{name}{binding} +\kwindex{from} + +The system maintains a table of modules that have been initialized, +indexed by module name. (The current implementation makes this table +accessible as \code{sys.modules}.) When a module name is found in +this table, step (1) is finished. If not, a search for a module +definition is started. This first looks for a built-in module +definition, and if no built-in module if the given name is found, it +searches a user-specified list of directories for a file whose name is +the module name with extension \file{.py}. (The current +implementation uses the list of strings \code{sys.path} as the search +path; it is initialized from the shell environment variable +\envvar{PYTHONPATH}, with an installation-dependent default.) +\ttindex{modules} +\ttindex{sys.modules} +\indexii{module}{name} +\indexii{built-in}{module} +\indexii{user-defined}{module} +\refbimodindex{sys} +\ttindex{path} +\ttindex{sys.path} +\indexii{filename}{extension} + +If a built-in module is found, its built-in initialization code is +executed and step (1) is finished. If no matching file is found, +\exception{ImportError} is raised. If a file is found, it is parsed, +yielding an executable code block. If a syntax error occurs, +\exception{SyntaxError} is raised. Otherwise, an empty module of the given +name is created and inserted in the module table, and then the code +block is executed in the context of this module. Exceptions during +this execution terminate step (1). +\indexii{module}{initialization} +\exindex{SyntaxError} +\exindex{ImportError} +\index{code block} + +When step (1) finishes without raising an exception, step (2) can +begin. + +The first form of \keyword{import} statement binds the module name in the +local name space to the module object, and then goes on to import the +next identifier, if any. The \keyword{from} from does not bind the +module name: it goes through the list of identifiers, looks each one +of them up in the module found in step (1), and binds the name in the +local name space to the object thus found. If a name is not found, +\exception{ImportError} is raised. If the list of identifiers is replaced +by a star (\verb@*@), all names defined in the module are bound, +except those beginning with an underscore(\verb@_@). +\indexii{name}{binding} +\exindex{ImportError} + +Names bound by import statements may not occur in \keyword{global} +statements in the same scope. +\stindex{global} + +The \keyword{from} form with \verb@*@ may only occur in a module scope. +\kwindex{from} +\ttindex{from ... import *} + +(The current implementation does not enforce the latter two +restrictions, but programs should not abuse this freedom, as future +implementations may enforce them or silently change the meaning of the +program.) + +\section{The {\tt global} statement} \label{global} +\stindex{global} + +\begin{verbatim} +global_stmt: "global" identifier ("," identifier)* +\end{verbatim} + +The \keyword{global} statement is a declaration which holds for the +entire current code block. It means that the listed identifiers are to be +interpreted as globals. While {\em using} global names is automatic +if they are not defined in the local scope, {\em assigning} to global +names would be impossible without \keyword{global}. +\indexiii{global}{name}{binding} + +Names listed in a \keyword{global} statement must not be used in the same +code block before that \keyword{global} statement is executed. + +Names listed in a \keyword{global} statement must not be defined as formal +parameters or in a \keyword{for} loop control target, \keyword{class} +definition, function definition, or \keyword{import} statement. + +(The current implementation does not enforce the latter two +restrictions, but programs should not abuse this freedom, as future +implementations may enforce them or silently change the meaning of the +program.) + +Note: the \keyword{global} is a directive to the parser. Therefore, it +applies only to code parsed at the same time as the \keyword{global} +statement. In particular, a \keyword{global} statement contained in an +\keyword{exec} statement does not affect the code block {\em containing} +the \keyword{exec} statement, and code contained in an \keyword{exec} +statement is unaffected by \keyword{global} statements in the code +containing the \keyword{exec} statement. The same applies to the +\function{eval()}, \function{execfile()} and \function{compile()} functions. +\stindex{exec} +\bifuncindex{eval} +\bifuncindex{execfile} +\bifuncindex{compile} diff --git a/Doc/ref/ref7.tex b/Doc/ref/ref7.tex new file mode 100644 index 0000000..f5b8a0e --- /dev/null +++ b/Doc/ref/ref7.tex @@ -0,0 +1,391 @@ +\chapter{Compound statements} +\indexii{compound}{statement} + +Compound statements contain (groups of) other statements; they affect +or control the execution of those other statements in some way. In +general, compound statements span multiple lines, although in simple +incarnations a whole compound statement may be contained in one line. + +The \verb@if@, \verb@while@ and \verb@for@ statements implement +traditional control flow constructs. \verb@try@ specifies exception +handlers and/or cleanup code for a group of statements. Function and +class definitions are also syntactically compound statements. + +Compound statements consist of one or more `clauses'. A clause +consists of a header and a `suite'. The clause headers of a +particular compound statement are all at the same indentation level. +Each clause header begins with a uniquely identifying keyword and ends +with a colon. A suite is a group of statements controlled by a +clause. A suite can be one or more semicolon-separated simple +statements on the same line as the header, following the header's +colon, or it can be one or more indented statements on subsequent +lines. Only the latter form of suite can contain nested compound +statements; the following is illegal, mostly because it wouldn't be +clear to which \verb@if@ clause a following \verb@else@ clause would +belong: +\index{clause} +\index{suite} + +\begin{verbatim} +if test1: if test2: print x +\end{verbatim} + +Also note that the semicolon binds tighter than the colon in this +context, so that in the following example, either all or none of the +\verb@print@ statements are executed: + +\begin{verbatim} +if x < y < z: print x; print y; print z +\end{verbatim} + +Summarizing: + +\begin{verbatim} +compound_stmt: if_stmt | while_stmt | for_stmt + | try_stmt | funcdef | classdef +suite: stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT +statement: stmt_list NEWLINE | compound_stmt +stmt_list: simple_stmt (";" simple_stmt)* [";"] +\end{verbatim} + +Note that statements always end in a \verb@NEWLINE@ possibly followed +by a \verb@DEDENT@. +\index{NEWLINE token} +\index{DEDENT token} + +Also note that optional continuation clauses always begin with a +keyword that cannot start a statement, thus there are no ambiguities +(the `dangling \verb@else@' problem is solved in Python by requiring +nested \verb@if@ statements to be indented). +\indexii{dangling}{else} + +The formatting of the grammar rules in the following sections places +each clause on a separate line for clarity. + +\section{The {\tt if} statement} +\stindex{if} + +The \verb@if@ statement is used for conditional execution: + +\begin{verbatim} +if_stmt: "if" condition ":" suite + ("elif" condition ":" suite)* + ["else" ":" suite] +\end{verbatim} + +It selects exactly one of the suites by evaluating the conditions one +by one until one is found to be true (see section \ref{Booleans} for +the definition of true and false); then that suite is executed (and no +other part of the \verb@if@ statement is executed or evaluated). If +all conditions are false, the suite of the \verb@else@ clause, if +present, is executed. +\kwindex{elif} +\kwindex{else} + +\section{The {\tt while} statement} +\stindex{while} +\indexii{loop}{statement} + +The \verb@while@ statement is used for repeated execution as long as a +condition is true: + +\begin{verbatim} +while_stmt: "while" condition ":" suite + ["else" ":" suite] +\end{verbatim} + +This repeatedly tests the condition and, if it is true, executes the +first suite; if the condition is false (which may be the first time it +is tested) the suite of the \verb@else@ clause, if present, is +executed and the loop terminates. +\kwindex{else} + +A \verb@break@ statement executed in the first suite terminates the +loop without executing the \verb@else@ clause's suite. A +\verb@continue@ statement executed in the first suite skips the rest +of the suite and goes back to testing the condition. +\stindex{break} +\stindex{continue} + +\section{The {\tt for} statement} +\stindex{for} +\indexii{loop}{statement} + +The \verb@for@ statement is used to iterate over the elements of a +sequence (string, tuple or list): +\obindex{sequence} + +\begin{verbatim} +for_stmt: "for" target_list "in" condition_list ":" suite + ["else" ":" suite] +\end{verbatim} + +The condition list is evaluated once; it should yield a sequence. The +suite is then executed once for each item in the sequence, in the +order of ascending indices. Each item in turn is assigned to the +target list using the standard rules for assignments, and then the +suite is executed. When the items are exhausted (which is immediately +when the sequence is empty), the suite in the \verb@else@ clause, if +present, is executed, and the loop terminates. +\kwindex{in} +\kwindex{else} +\indexii{target}{list} + +A \verb@break@ statement executed in the first suite terminates the +loop without executing the \verb@else@ clause's suite. A +\verb@continue@ statement executed in the first suite skips the rest +of the suite and continues with the next item, or with the \verb@else@ +clause if there was no next item. +\stindex{break} +\stindex{continue} + +The suite may assign to the variable(s) in the target list; this does +not affect the next item assigned to it. + +The target list is not deleted when the loop is finished, but if the +sequence is empty, it will not have been assigned to at all by the +loop. + +Hint: the built-in function \verb@range()@ returns a sequence of +integers suitable to emulate the effect of Pascal's +\verb@for i := a to b do@; +e.g. \verb@range(3)@ returns the list \verb@[0, 1, 2]@. +\bifuncindex{range} +\index{Pascal} + +{\bf Warning:} There is a subtlety when the sequence is being modified +by the loop (this can only occur for mutable sequences, i.e. lists). +An internal counter is used to keep track of which item is used next, +and this is incremented on each iteration. When this counter has +reached the length of the sequence the loop terminates. This means that +if the suite deletes the current (or a previous) item from the +sequence, the next item will be skipped (since it gets the index of +the current item which has already been treated). Likewise, if the +suite inserts an item in the sequence before the current item, the +current item will be treated again the next time through the loop. +This can lead to nasty bugs that can be avoided by making a temporary +copy using a slice of the whole sequence, e.g. +\index{loop!over mutable sequence} +\index{mutable sequence!loop over} + +\begin{verbatim} +for x in a[:]: + if x < 0: a.remove(x) +\end{verbatim} + +\section{The {\tt try} statement} \label{try} +\stindex{try} + +The \verb@try@ statement specifies exception handlers and/or cleanup +code for a group of statements: + +\begin{verbatim} +try_stmt: try_exc_stmt | try_fin_stmt +try_exc_stmt: "try" ":" suite + ("except" [condition ["," target]] ":" suite)+ + ["else" ":" suite] +try_fin_stmt: "try" ":" suite + "finally" ":" suite +\end{verbatim} + +There are two forms of \verb@try@ statement: \verb@try...except@ and +\verb@try...finally@. These forms cannot be mixed. + +The \verb@try...except@ form specifies one or more exception handlers +(the \verb@except@ clauses). When no exception occurs in the +\verb@try@ clause, no exception handler is executed. When an +exception occurs in the \verb@try@ suite, a search for an exception +handler is started. This inspects the except clauses in turn until +one is found that matches the exception. A condition-less except +clause, if present, must be last; it matches any exception. For an +except clause with a condition, that condition is evaluated, and the +clause matches the exception if the resulting object is ``compatible'' +with the exception. An object is compatible with an exception if it +is either the object that identifies the exception, or (for exceptions +that are classes) it is a base class of the exception, or it is a +tuple containing an item that is compatible with the exception. Note +that the object identities must match, i.e. it must be the same +object, not just an object with the same value. +\kwindex{except} + +If no except clause matches the exception, the search for an exception +handler continues in the surrounding code and on the invocation stack. + +If the evaluation of a condition in the header of an except clause +raises an exception, the original search for a handler is cancelled +and a search starts for the new exception in the surrounding code and +on the call stack (it is treated as if the entire \verb@try@ statement +raised the exception). + +When a matching except clause is found, the exception's parameter is +assigned to the target specified in that except clause, if present, +and the except clause's suite is executed. When the end of this suite +is reached, execution continues normally after the entire try +statement. (This means that if two nested handlers exist for the same +exception, and the exception occurs in the try clause of the inner +handler, the outer handler will not handle the exception.) + +Before an except clause's suite is executed, details about the +exception are assigned to three variables in the \verb@sys@ module: +\verb@sys.exc_type@ receives the object identifying the exception; +\verb@sys.exc_value@ receives the exception's parameter; +\verb@sys.exc_traceback@ receives a traceback object (see section +\ref{traceback}) identifying the point in the program where the +exception occurred. +\refbimodindex{sys} +\ttindex{exc_type} +\ttindex{exc_value} +\ttindex{exc_traceback} +\obindex{traceback} + +The optional \verb@else@ clause is executed when no exception occurs +in the \verb@try@ clause. Exceptions in the \verb@else@ clause are +not handled by the preceding \verb@except@ clauses. +\kwindex{else} + +The \verb@try...finally@ form specifies a `cleanup' handler. The +\verb@try@ clause is executed. When no exception occurs, the +\verb@finally@ clause is executed. When an exception occurs in the +\verb@try@ clause, the exception is temporarily saved, the +\verb@finally@ clause is executed, and then the saved exception is +re-raised. If the \verb@finally@ clause raises another exception or +executes a \verb@return@, \verb@break@ or \verb@continue@ statement, +the saved exception is lost. +\kwindex{finally} + +When a \verb@return@ or \verb@break@ statement is executed in the +\verb@try@ suite of a \verb@try...finally@ statement, the +\verb@finally@ clause is also executed `on the way out'. A +\verb@continue@ statement is illegal in the \verb@try@ clause. (The +reason is a problem with the current implementation --- this +restriction may be lifted in the future). +\stindex{return} +\stindex{break} +\stindex{continue} + +\section{Function definitions} \label{function} +\indexii{function}{definition} + +A function definition defines a user-defined function object (see +section \ref{types}):\footnote{The new syntax to receive arbitrary +keyword arguments is not yet documented in this manual. See chapter +12 of the Tutorial.} +\obindex{user-defined function} +\obindex{function} + +\begin{verbatim} +funcdef: "def" funcname "(" [parameter_list] ")" ":" suite +parameter_list: (defparameter ",")* ("*" identifier [, "**" identifier] + | "**" identifier + | defparameter [","]) +defparameter: parameter ["=" condition] +sublist: parameter ("," parameter)* [","] +parameter: identifier | "(" sublist ")" +funcname: identifier +\end{verbatim} + +A function definition is an executable statement. Its execution binds +the function name in the current local name space to a function object +(a wrapper around the executable code for the function). This +function object contains a reference to the current global name space +as the global name space to be used when the function is called. +\indexii{function}{name} +\indexii{name}{binding} + +The function definition does not execute the function body; this gets +executed only when the function is called. + +When one or more top-level parameters have the form {\em parameter = +condition}, the function is said to have ``default parameter values''. +Default parameter values are evaluated when the function definition is +executed. For a parameter with a default value, the correponding +argument may be omitted from a call, in which case the parameter's +default value is substituted. If a parameter has a default value, all +following parameters must also have a default value --- this is a +syntactic restriction that is not expressed by the grammar.% +\footnote{Currently this is not checked; instead, +{\tt def f(a=1,b)} is interpreted as {\tt def f(a=1,b=None)}.} +\indexiii{default}{parameter}{value} + +Function call semantics are described in section \ref{calls}. When a +user-defined function is called, first missing arguments for which a +default value exists are supplied; then the arguments (a.k.a. actual +parameters) are bound to the (formal) parameters, as follows: +\indexii{function}{call} +\indexiii{user-defined}{function}{call} +\index{parameter} +\index{argument} +\indexii{parameter}{formal} +\indexii{parameter}{actual} + +\begin{itemize} + +\item +If there are no formal parameters, there must be no arguments. + +\item +If the formal parameter list does not end in a star followed by an +identifier, there must be exactly as many arguments as there are +parameters in the formal parameter list (at the top level); the +arguments are assigned to the formal parameters one by one. Note that +the presence or absence of a trailing comma at the top level in either +the formal or the actual parameter list makes no difference. The +assignment to a formal parameter is performed as if the parameter +occurs on the left hand side of an assignment statement whose right +hand side's value is that of the argument. + +\item +If the formal parameter list ends in a star followed by an identifier, +preceded by zero or more comma-followed parameters, there must be at +least as many arguments as there are parameters preceding the star. +Call this number {\em N}. The first {\em N} arguments are assigned to +the corresponding formal parameters in the way descibed above. A +tuple containing the remaining arguments, if any, is then assigned to +the identifier following the star. This variable will always be a +tuple: if there are no extra arguments, its value is \verb@()@, if +there is just one extra argument, it is a singleton tuple. +\indexii{variable length}{parameter list} + +\end{itemize} + +Note that the `variable length parameter list' feature only works at +the top level of the parameter list; individual parameters use a model +corresponding more closely to that of ordinary assignment. While the +latter model is generally preferable, because of the greater type +safety it offers (wrong-sized tuples aren't silently mistreated), +variable length parameter lists are a sufficiently accepted practice +in most programming languages that a compromise has been worked out. +(And anyway, assignment has no equivalent for empty argument lists.) + +It is also possible to create anonymous functions (functions not bound +to a name), for immediate use in expressions. This uses lambda forms, +described in section \ref{lambda}. +\indexii{lambda}{form} + +\section{Class definitions} \label{class} +\indexii{class}{definition} + +A class definition defines a class object (see section \ref{types}): +\obindex{class} + +\begin{verbatim} +classdef: "class" classname [inheritance] ":" suite +inheritance: "(" [condition_list] ")" +classname: identifier +\end{verbatim} + +A class definition is an executable statement. It first evaluates the +inheritance list, if present. Each item in the inheritance list +should evaluate to a class object. The class's suite is then executed +in a new execution frame (see section \ref{execframes}), using a newly +created local name space and the original global name space. +(Usually, the suite contains only function definitions.) When the +class's suite finishes execution, its execution frame is discarded but +its local name space is saved. A class object is then created using +the inheritance list for the base classes and the saved local name +space for the attribute dictionary. The class name is bound to this +class object in the original local name space. +\index{inheritance} +\indexii{class}{name} +\indexii{name}{binding} +\indexii{execution}{frame} diff --git a/Doc/ref/ref8.tex b/Doc/ref/ref8.tex new file mode 100644 index 0000000..a678f9f --- /dev/null +++ b/Doc/ref/ref8.tex @@ -0,0 +1,105 @@ +\chapter{Top-level components} + +The Python interpreter can get its input from a number of sources: +from a script passed to it as standard input or as program argument, +typed in interactively, from a module source file, etc. This chapter +gives the syntax used in these cases. +\index{interpreter} + +\section{Complete Python programs} +\index{program} + +While a language specification need not prescribe how the language +interpreter is invoked, it is useful to have a notion of a complete +Python program. A complete Python program is executed in a minimally +initialized environment: all built-in and standard modules are +available, but none have been initialized, except for \verb@sys@ +(various system services), \verb@__builtin__@ (built-in functions, +exceptions and \verb@None@) and \verb@__main__@. The latter is used +to provide the local and global name space for execution of the +complete program. +\refbimodindex{sys} +\refbimodindex{__main__} +\refbimodindex{__builtin__} + +The syntax for a complete Python program is that for file input, +described in the next section. + +The interpreter may also be invoked in interactive mode; in this case, +it does not read and execute a complete program but reads and executes +one statement (possibly compound) at a time. The initial environment +is identical to that of a complete program; each statement is executed +in the name space of \verb@__main__@. +\index{interactive mode} +\refbimodindex{__main__} + +Under {\UNIX}, a complete program can be passed to the interpreter in +three forms: with the {\bf -c} {\it string} command line option, as a +file passed as the first command line argument, or as standard input. +If the file or standard input is a tty device, the interpreter enters +interactive mode; otherwise, it executes the file as a complete +program. +\index{UNIX} +\index{command line} +\index{standard input} + +\section{File input} + +All input read from non-interactive files has the same form: + +\begin{verbatim} +file_input: (NEWLINE | statement)* +\end{verbatim} + +This syntax is used in the following situations: + +\begin{itemize} + +\item when parsing a complete Python program (from a file or from a string); + +\item when parsing a module; + +\item when parsing a string passed to the \verb@exec@ statement; + +\end{itemize} + +\section{Interactive input} + +Input in interactive mode is parsed using the following grammar: + +\begin{verbatim} +interactive_input: [stmt_list] NEWLINE | compound_stmt NEWLINE +\end{verbatim} + +Note that a (top-level) compound statement must be followed by a blank +line in interactive mode; this is needed to help the parser detect the +end of the input. + +\section{Expression input} +\index{input} + +There are two forms of expression input. Both ignore leading +whitespace. + +The string argument to \verb@eval()@ must have the following form: +\bifuncindex{eval} + +\begin{verbatim} +eval_input: condition_list NEWLINE* +\end{verbatim} + +The input line read by \verb@input()@ must have the following form: +\bifuncindex{input} + +\begin{verbatim} +input_input: condition_list NEWLINE +\end{verbatim} + +Note: to read `raw' input line without interpretation, you can use the +built-in function \verb@raw_input()@ or the \verb@readline()@ method +of file objects. +\obindex{file} +\index{input!raw} +\index{raw input} +\bifuncindex{raw_index} +\ttindex{readline} diff --git a/Doc/ref1.tex b/Doc/ref1.tex new file mode 100644 index 0000000..30bfcce --- /dev/null +++ b/Doc/ref1.tex @@ -0,0 +1,81 @@ +\chapter{Introduction} + +This reference manual describes the Python programming language. +It is not intended as a tutorial. + +While I am trying to be as precise as possible, I chose to use English +rather than formal specifications for everything except syntax and +lexical analysis. This should make the document more understandable +to the average reader, but will leave room for ambiguities. +Consequently, if you were coming from Mars and tried to re-implement +Python from this document alone, you might have to guess things and in +fact you would probably end up implementing quite a different language. +On the other hand, if you are using +Python and wonder what the precise rules about a particular area of +the language are, you should definitely be able to find them here. + +It is dangerous to add too many implementation details to a language +reference document --- the implementation may change, and other +implementations of the same language may work differently. On the +other hand, there is currently only one Python implementation, and +its particular quirks are sometimes worth being mentioned, especially +where the implementation imposes additional limitations. Therefore, +you'll find short ``implementation notes'' sprinkled throughout the +text. + +Every Python implementation comes with a number of built-in and +standard modules. These are not documented here, but in the separate +{\em Python Library Reference} document. A few built-in modules are +mentioned when they interact in a significant way with the language +definition. + +\section{Notation} + +The descriptions of lexical analysis and syntax use a modified BNF +grammar notation. This uses the following style of definition: +\index{BNF} +\index{grammar} +\index{syntax} +\index{notation} + +\begin{verbatim} +name: lc_letter (lc_letter | "_")* +lc_letter: "a"..."z" +\end{verbatim} + +The first line says that a \verb@name@ is an \verb@lc_letter@ followed by +a sequence of zero or more \verb@lc_letter@s and underscores. An +\verb@lc_letter@ in turn is any of the single characters `a' through `z'. +(This rule is actually adhered to for the names defined in lexical and +grammar rules in this document.) + +Each rule begins with a name (which is the name defined by the rule) +and a colon. A vertical bar (\verb@|@) is used to separate +alternatives; it is the least binding operator in this notation. A +star (\verb@*@) means zero or more repetitions of the preceding item; +likewise, a plus (\verb@+@) means one or more repetitions, and a +phrase enclosed in square brackets (\verb@[ ]@) means zero or one +occurrences (in other words, the enclosed phrase is optional). The +\verb@*@ and \verb@+@ operators bind as tightly as possible; +parentheses are used for grouping. Literal strings are enclosed in +quotes. White space is only meaningful to separate tokens. +Rules are normally contained on a single line; rules with many +alternatives may be formatted alternatively with each line after the +first beginning with a vertical bar. + +In lexical definitions (as the example above), two more conventions +are used: Two literal characters separated by three dots mean a choice +of any single character in the given (inclusive) range of \ASCII{} +characters. A phrase between angular brackets (\verb@<...>@) gives an +informal description of the symbol defined; e.g. this could be used +to describe the notion of `control character' if needed. +\index{lexical definitions} +\index{ASCII} + +Even though the notation used is almost the same, there is a big +difference between the meaning of lexical and syntactic definitions: +a lexical definition operates on the individual characters of the +input source, while a syntax definition operates on the stream of +tokens generated by the lexical analysis. All uses of BNF in the next +chapter (``Lexical Analysis'') are lexical definitions; uses in +subsequent chapters are syntactic definitions. diff --git a/Doc/ref2.tex b/Doc/ref2.tex new file mode 100644 index 0000000..b093998 --- /dev/null +++ b/Doc/ref2.tex @@ -0,0 +1,372 @@ +\chapter{Lexical analysis} + +A Python program is read by a {\em parser}. Input to the parser is a +stream of {\em tokens}, generated by the {\em lexical analyzer}. This +chapter describes how the lexical analyzer breaks a file into tokens. +\index{lexical analysis} +\index{parser} +\index{token} + +\section{Line structure} + +A Python program is divided in a number of logical lines. The end of +a logical line is represented by the token NEWLINE. Statements cannot +cross logical line boundaries except where NEWLINE is allowed by the +syntax (e.g. between statements in compound statements). +\index{line structure} +\index{logical line} +\index{NEWLINE token} + +\subsection{Comments} + +A comment starts with a hash character (\verb@#@) that is not part of +a string literal, and ends at the end of the physical line. A comment +always signifies the end of the logical line. Comments are ignored by +the syntax. +\index{comment} +\index{logical line} +\index{physical line} +\index{hash character} + +\subsection{Explicit line joining} + +Two or more physical lines may be joined into logical lines using +backslash characters (\verb/\/), as follows: when a physical line ends +in a backslash that is not part of a string literal or comment, it is +joined with the following forming a single logical line, deleting the +backslash and the following end-of-line character. For example: +\index{physical line} +\index{line joining} +\index{line continuation} +\index{backslash character} +% +\begin{verbatim} +if 1900 < year < 2100 and 1 <= month <= 12 \ + and 1 <= day <= 31 and 0 <= hour < 24 \ + and 0 <= minute < 60 and 0 <= second < 60: # Looks like a valid date + return 1 +\end{verbatim} + +A line ending in a backslash cannot carry a comment; a backslash does +not continue a comment (but it does continue a string literal, see +below). + +\subsection{Implicit line joining} + +Expressions in parentheses, square brackets or curly braces can be +split over more than one physical line without using backslashes. +For example: + +\begin{verbatim} +month_names = ['Januari', 'Februari', 'Maart', # These are the + 'April', 'Mei', 'Juni', # Dutch names + 'Juli', 'Augustus', 'September', # for the months + 'Oktober', 'November', 'December'] # of the year +\end{verbatim} + +Implicitly continued lines can carry comments. The indentation of the +continuation lines is not important. Blank continuation lines are +allowed. + +\subsection{Blank lines} + +A logical line that contains only spaces, tabs, and possibly a +comment, is ignored (i.e., no NEWLINE token is generated), except that +during interactive input of statements, an entirely blank logical line +terminates a multi-line statement. +\index{blank line} + +\subsection{Indentation} + +Leading whitespace (spaces and tabs) at the beginning of a logical +line is used to compute the indentation level of the line, which in +turn is used to determine the grouping of statements. +\index{indentation} +\index{whitespace} +\index{leading whitespace} +\index{space} +\index{tab} +\index{grouping} +\index{statement grouping} + +First, tabs are replaced (from left to right) by one to eight spaces +such that the total number of characters up to there is a multiple of +eight (this is intended to be the same rule as used by {\UNIX}). The +total number of spaces preceding the first non-blank character then +determines the line's indentation. Indentation cannot be split over +multiple physical lines using backslashes. + +The indentation levels of consecutive lines are used to generate +INDENT and DEDENT tokens, using a stack, as follows. +\index{INDENT token} +\index{DEDENT token} + +Before the first line of the file is read, a single zero is pushed on +the stack; this will never be popped off again. The numbers pushed on +the stack will always be strictly increasing from bottom to top. At +the beginning of each logical line, the line's indentation level is +compared to the top of the stack. If it is equal, nothing happens. +If it is larger, it is pushed on the stack, and one INDENT token is +generated. If it is smaller, it {\em must} be one of the numbers +occurring on the stack; all numbers on the stack that are larger are +popped off, and for each number popped off a DEDENT token is +generated. At the end of the file, a DEDENT token is generated for +each number remaining on the stack that is larger than zero. + +Here is an example of a correctly (though confusingly) indented piece +of Python code: + +\begin{verbatim} +def perm(l): + # Compute the list of all permutations of l + + if len(l) <= 1: + return [l] + r = [] + for i in range(len(l)): + s = l[:i] + l[i+1:] + p = perm(s) + for x in p: + r.append(l[i:i+1] + x) + return r +\end{verbatim} + +The following example shows various indentation errors: + +\begin{verbatim} + def perm(l): # error: first line indented + for i in range(len(l)): # error: not indented + s = l[:i] + l[i+1:] + p = perm(l[:i] + l[i+1:]) # error: unexpected indent + for x in p: + r.append(l[i:i+1] + x) + return r # error: inconsistent dedent +\end{verbatim} + +(Actually, the first three errors are detected by the parser; only the +last error is found by the lexical analyzer --- the indentation of +\verb@return r@ does not match a level popped off the stack.) + +\section{Other tokens} + +Besides NEWLINE, INDENT and DEDENT, the following categories of tokens +exist: identifiers, keywords, literals, operators, and delimiters. +Spaces and tabs are not tokens, but serve to delimit tokens. Where +ambiguity exists, a token comprises the longest possible string that +forms a legal token, when read from left to right. + +\section{Identifiers} + +Identifiers (also referred to as names) are described by the following +lexical definitions: +\index{identifier} +\index{name} + +\begin{verbatim} +identifier: (letter|"_") (letter|digit|"_")* +letter: lowercase | uppercase +lowercase: "a"..."z" +uppercase: "A"..."Z" +digit: "0"..."9" +\end{verbatim} + +Identifiers are unlimited in length. Case is significant. + +\subsection{Keywords} + +The following identifiers are used as reserved words, or {\em +keywords} of the language, and cannot be used as ordinary +identifiers. They must be spelled exactly as written here: +\index{keyword} +\index{reserved word} + +\begin{verbatim} +and elif global not try +break else if or while +class except import pass +continue finally in print +def for is raise +del from lambda return +\end{verbatim} + +% When adding keywords, pipe it through keywords.py for reformatting + +\section{Literals} \label{literals} + +Literals are notations for constant values of some built-in types. +\index{literal} +\index{constant} + +\subsection{String literals} + +String literals are described by the following lexical definitions: +\index{string literal} + +\begin{verbatim} +stringliteral: shortstring | longstring +shortstring: "'" shortstringitem* "'" | '"' shortstringitem* '"' +longstring: "'''" longstringitem* "'''" | '"""' longstringitem* '"""' +shortstringitem: shortstringchar | escapeseq +longstringitem: longstringchar | escapeseq +shortstringchar: +longstringchar: +escapeseq: "\" +\end{verbatim} +\index{ASCII} + +In ``long strings'' (strings surrounded by sets of three quotes), +unescaped newlines and quotes are allowed (and are retained), except +that three unescaped quotes in a row terminate the string. (A +``quote'' is the character used to open the string, i.e. either +\verb/'/ or \verb/"/.) + +Escape sequences in strings are interpreted according to rules similar +to those used by Standard C. The recognized escape sequences are: +\index{physical line} +\index{escape sequence} +\index{Standard C} +\index{C} + +\begin{center} +\begin{tabular}{|l|l|} +\hline +\verb/\/{\em newline} & Ignored \\ +\verb/\\/ & Backslash (\verb/\/) \\ +\verb/\'/ & Single quote (\verb/'/) \\ +\verb/\"/ & Double quote (\verb/"/) \\ +\verb/\a/ & \ASCII{} Bell (BEL) \\ +\verb/\b/ & \ASCII{} Backspace (BS) \\ +%\verb/\E/ & \ASCII{} Escape (ESC) \\ +\verb/\f/ & \ASCII{} Formfeed (FF) \\ +\verb/\n/ & \ASCII{} Linefeed (LF) \\ +\verb/\r/ & \ASCII{} Carriage Return (CR) \\ +\verb/\t/ & \ASCII{} Horizontal Tab (TAB) \\ +\verb/\v/ & \ASCII{} Vertical Tab (VT) \\ +\verb/\/{\em ooo} & \ASCII{} character with octal value {\em ooo} \\ +\verb/\x/{\em xx...} & \ASCII{} character with hex value {\em xx...} \\ +\hline +\end{tabular} +\end{center} +\index{ASCII} + +In strict compatibility with Standard C, up to three octal digits are +accepted, but an unlimited number of hex digits is taken to be part of +the hex escape (and then the lower 8 bits of the resulting hex number +are used in all current implementations...). + +All unrecognized escape sequences are left in the string unchanged, +i.e., {\em the backslash is left in the string.} (This behavior is +useful when debugging: if an escape sequence is mistyped, the +resulting output is more easily recognized as broken. It also helps a +great deal for string literals used as regular expressions or +otherwise passed to other modules that do their own escape handling.) +\index{unrecognized escape sequence} + +\subsection{Numeric literals} + +There are three types of numeric literals: plain integers, long +integers, and floating point numbers. +\index{number} +\index{numeric literal} +\index{integer literal} +\index{plain integer literal} +\index{long integer literal} +\index{floating point literal} +\index{hexadecimal literal} +\index{octal literal} +\index{decimal literal} + +Integer and long integer literals are described by the following +lexical definitions: + +\begin{verbatim} +longinteger: integer ("l"|"L") +integer: decimalinteger | octinteger | hexinteger +decimalinteger: nonzerodigit digit* | "0" +octinteger: "0" octdigit+ +hexinteger: "0" ("x"|"X") hexdigit+ + +nonzerodigit: "1"..."9" +octdigit: "0"..."7" +hexdigit: digit|"a"..."f"|"A"..."F" +\end{verbatim} + +Although both lower case `l' and upper case `L' are allowed as suffix +for long integers, it is strongly recommended to always use `L', since +the letter `l' looks too much like the digit `1'. + +Plain integer decimal literals must be at most 2147483647 (i.e., the +largest positive integer, using 32-bit arithmetic). Plain octal and +hexadecimal literals may be as large as 4294967295, but values larger +than 2147483647 are converted to a negative value by subtracting +4294967296. There is no limit for long integer literals apart from +what can be stored in available memory. + +Some examples of plain and long integer literals: + +\begin{verbatim} +7 2147483647 0177 0x80000000 +3L 79228162514264337593543950336L 0377L 0x100000000L +\end{verbatim} + +Floating point literals are described by the following lexical +definitions: + +\begin{verbatim} +floatnumber: pointfloat | exponentfloat +pointfloat: [intpart] fraction | intpart "." +exponentfloat: (intpart | pointfloat) exponent +intpart: digit+ +fraction: "." digit+ +exponent: ("e"|"E") ["+"|"-"] digit+ +\end{verbatim} + +The allowed range of floating point literals is +implementation-dependent. + +Some examples of floating point literals: + +\begin{verbatim} +3.14 10. .001 1e100 3.14e-10 +\end{verbatim} + +Note that numeric literals do not include a sign; a phrase like +\verb@-1@ is actually an expression composed of the operator +\verb@-@ and the literal \verb@1@. + +\section{Operators} + +The following tokens are operators: +\index{operators} + +\begin{verbatim} ++ - * / % +<< >> & | ^ ~ +< == > <= <> != >= +\end{verbatim} + +The comparison operators \verb@<>@ and \verb@!=@ are alternate +spellings of the same operator. + +\section{Delimiters} + +The following tokens serve as delimiters or otherwise have a special +meaning: +\index{delimiters} + +\begin{verbatim} +( ) [ ] { } +, : . " ` ' += ; +\end{verbatim} + +The following printing \ASCII{} characters are not used in Python. Their +occurrence outside string literals and comments is an unconditional +error: +\index{ASCII} + +\begin{verbatim} +@ $ ? +\end{verbatim} + +They may be used by future versions of the language though! diff --git a/Doc/ref3.tex b/Doc/ref3.tex new file mode 100644 index 0000000..fd152c1 --- /dev/null +++ b/Doc/ref3.tex @@ -0,0 +1,889 @@ +\chapter{Data model} + +\section{Objects, values and types} + +\dfn{Objects} are Python's abstraction for data. All data in a Python +program is represented by objects or by relations between objects. +(In a sense, and in conformance to Von Neumann's model of a +``stored program computer'', code is also represented by objects.) +\index{object} +\index{data} + +Every object has an identity, a type and a value. An object's +\emph{identity} never changes once it has been created; you may think +of it as the object's address in memory. An object's \dfn{type} is +also unchangeable. It determines the operations that an object +supports (e.g.\ ``does it have a length?'') and also defines the +possible values for objects of that type. The \emph{value} of some +objects can change. Objects whose value can change are said to be +\emph{mutable}; objects whose value is unchangeable once they are +created are called \emph{immutable}. The type determines an object's +(im)mutability. +\index{identity of an object} +\index{value of an object} +\index{type of an object} +\index{mutable object} +\index{immutable object} + +Objects are never explicitly destroyed; however, when they become +unreachable they may be garbage-collected. An implementation is +allowed to delay garbage collection or omit it altogether --- it is a +matter of implementation quality how garbage collection is +implemented, as long as no objects are collected that are still +reachable. (Implementation note: the current implementation uses a +reference-counting scheme which collects most objects as soon as they +become unreachable, but never collects garbage containing circular +references.) +\index{garbage collection} +\index{reference counting} +\index{unreachable object} + +Note that the use of the implementation's tracing or debugging +facilities may keep objects alive that would normally be collectable. + +Some objects contain references to ``external'' resources such as open +files or windows. It is understood that these resources are freed +when the object is garbage-collected, but since garbage collection is +not guaranteed to happen, such objects also provide an explicit way to +release the external resource, usually a \method{close()} method. +Programs are strongly recommended to always explicitly close such +objects. + +Some objects contain references to other objects; these are called +\emph{containers}. Examples of containers are tuples, lists and +dictionaries. The references are part of a container's value. In +most cases, when we talk about the value of a container, we imply the +values, not the identities of the contained objects; however, when we +talk about the (im)mutability of a container, only the identities of +the immediately contained objects are implied. (So, if an immutable +container contains a reference to a mutable object, its value changes +if that mutable object is changed.) +\index{container} + +Types affect almost all aspects of objects' lives. Even the meaning +of object identity is affected in some sense: for immutable types, +operations that compute new values may actually return a reference to +any existing object with the same type and value, while for mutable +objects this is not allowed. E.g. after + +\begin{verbatim} +a = 1; b = 1; c = []; d = [] +\end{verbatim} + +\code{a} and \code{b} may or may not refer to the same object with the +value one, depending on the implementation, but \code{c} and \code{d} +are guaranteed to refer to two different, unique, newly created empty +lists. + +\section{The standard type hierarchy} \label{types} + +Below is a list of the types that are built into Python. Extension +modules written in C can define additional types. Future versions of +Python may add types to the type hierarchy (e.g.\ rational or complex +numbers, efficiently stored arrays of integers, etc.). +\index{type} +\indexii{data}{type} +\indexii{type}{hierarchy} +\indexii{extension}{module} +\indexii{C}{language} + +Some of the type descriptions below contain a paragraph listing +`special attributes'. These are attributes that provide access to the +implementation and are not intended for general use. Their definition +may change in the future. There are also some `generic' special +attributes, not listed with the individual objects: \member{__methods__} +is a list of the method names of a built-in object, if it has any; +\member{__members__} is a list of the data attribute names of a built-in +object, if it has any. +\index{attribute} +\indexii{special}{attribute} +\indexiii{generic}{special}{attribute} +\ttindex{__methods__} +\ttindex{__members__} + +\begin{description} + +\item[None] +This type has a single value. There is a single object with this value. +This object is accessed through the built-in name \code{None}. +It is returned from functions that don't explicitly return an object. +\ttindex{None} +\obindex{None@{\tt None}} + +\item[Numbers] +These are created by numeric literals and returned as results by +arithmetic operators and arithmetic built-in functions. Numeric +objects are immutable; once created their value never changes. Python +numbers are of course strongly related to mathematical numbers, but +subject to the limitations of numerical representation in computers. +\obindex{number} +\obindex{numeric} + +Python distinguishes between integers and floating point numbers: + +\begin{description} +\item[Integers] +These represent elements from the mathematical set of whole numbers. +\obindex{integer} + +There are two types of integers: + +\begin{description} + +\item[Plain integers] +These represent numbers in the range -2147483648 through 2147483647. +(The range may be larger on machines with a larger natural word +size, but not smaller.) +When the result of an operation falls outside this range, the +exception \exception{OverflowError} is raised. +For the purpose of shift and mask operations, integers are assumed to +have a binary, 2's complement notation using 32 or more bits, and +hiding no bits from the user (i.e., all 4294967296 different bit +patterns correspond to different values). +\obindex{plain integer} +\withsubitem{(built-in exception)}{\ttindex{OverflowError}} + +\item[Long integers] +These represent numbers in an unlimited range, subject to available +(virtual) memory only. For the purpose of shift and mask operations, +a binary representation is assumed, and negative numbers are +represented in a variant of 2's complement which gives the illusion of +an infinite string of sign bits extending to the left. +\obindex{long integer} + +\end{description} % Integers + +The rules for integer representation are intended to give the most +meaningful interpretation of shift and mask operations involving +negative integers and the least surprises when switching between the +plain and long integer domains. For any operation except left shift, +if it yields a result in the plain integer domain without causing +overflow, it will yield the same result in the long integer domain or +when using mixed operands. +\indexii{integer}{representation} + +\item[Floating point numbers] +These represent machine-level double precision floating point numbers. +You are at the mercy of the underlying machine architecture and +C implementation for the accepted range and handling of overflow. +\obindex{floating point} +\indexii{floating point}{number} +\indexii{C}{language} + +\end{description} % Numbers + +\item[Sequences] +These represent finite ordered sets indexed by natural numbers. +The built-in function \function{len()}\bifuncindex{len} returns the +number of elements of a sequence. When this number is \var{n}, the +index set contains the numbers 0, 1, \ldots, \var{n}-1. Element +\var{i} of sequence \var{a} is selected by \code{\var{a}[\var{i}]}. +\obindex{seqence} +\index{index operation} +\index{item selection} +\index{subscription} + +Sequences also support slicing: \code{\var{a}[\var{i}:\var{j}]} +selects all elements with index \var{k} such that \var{i} \code{<=} +\var{k} \code{<} \var{j}. When used as an expression, a slice is a +sequence of the same type --- this implies that the index set is +renumbered so that it starts at 0 again. +\index{slicing} + +Sequences are distinguished according to their mutability: + +\begin{description} +% +\item[Immutable sequences] +An object of an immutable sequence type cannot change once it is +created. (If the object contains references to other objects, +these other objects may be mutable and may be changed; however +the collection of objects directly referenced by an immutable object +cannot change.) +\obindex{immutable sequence} +\obindex{immutable} + +The following types are immutable sequences: + +\begin{description} + +\item[Strings] +The elements of a string are characters. There is no separate +character type; a character is represented by a string of one element. +Characters represent (at least) 8-bit bytes. The built-in +functions \function{chr()}\bifuncindex{chr} and +\function{ord()}\bifuncindex{ord} convert between characters and +nonnegative integers representing the byte values. Bytes with the +values 0-127 represent the corresponding \ASCII{} values. The string +data type is also used to represent arrays of bytes, e.g.\ to hold data +read from a file. +\obindex{string} +\index{character} +\index{byte} +\index{ASCII} + +(On systems whose native character set is not \ASCII{}, strings may use +EBCDIC in their internal representation, provided the functions +\function{chr()} and \function{ord()} implement a mapping between \ASCII{} and +EBCDIC, and string comparison preserves the \ASCII{} order. +Or perhaps someone can propose a better rule?) +\index{ASCII} +\index{EBCDIC} +\index{character set} +\indexii{string}{comparison} +\bifuncindex{chr} +\bifuncindex{ord} + +\item[Tuples] +The elements of a tuple are arbitrary Python objects. +Tuples of two or more elements are formed by comma-separated lists +of expressions. A tuple of one element (a `singleton') can be formed +by affixing a comma to an expression (an expression by itself does +not create a tuple, since parentheses must be usable for grouping of +expressions). An empty tuple can be formed by enclosing `nothing' in +parentheses. +\obindex{tuple} +\indexii{singleton}{tuple} +\indexii{empty}{tuple} + +\end{description} % Immutable sequences + +\item[Mutable sequences] +Mutable sequences can be changed after they are created. The +subscription and slicing notations can be used as the target of +assignment and \keyword{del} (delete) statements. +\obindex{mutable sequece} +\obindex{mutable} +\indexii{assignment}{statement} +\index{delete} +\stindex{del} +\index{subscription} +\index{slicing} + +There is currently a single mutable sequence type: + +\begin{description} + +\item[Lists] +The elements of a list are arbitrary Python objects. Lists are formed +by placing a comma-separated list of expressions in square brackets. +(Note that there are no special cases needed to form lists of length 0 +or 1.) +\obindex{list} + +\end{description} % Mutable sequences + +\end{description} % Sequences + +\item[Mapping types] +These represent finite sets of objects indexed by arbitrary index sets. +The subscript notation \code{a[k]} selects the element indexed +by \code{k} from the mapping \code{a}; this can be used in +expressions and as the target of assignments or \keyword{del} statements. +The built-in function \function{len()} returns the number of elements +in a mapping. +\bifuncindex{len} +\index{subscription} +\obindex{mapping} + +There is currently a single mapping type: + +\begin{description} + +\item[Dictionaries] +These represent finite sets of objects indexed by almost arbitrary +values. The only types of values not acceptable as keys are values +containing lists or dictionaries or other mutable types that are +compared by value rather than by object identity --- the reason being +that the implementation requires that a key's hash value be constant. +Numeric types used for keys obey the normal rules for numeric +comparison: if two numbers compare equal (e.g.\ \code{1} and +\code{1.0}) then they can be used interchangeably to index the same +dictionary entry. + +Dictionaries are mutable; they are created by the \code{...} +notation (see section \ref{dict}). +\obindex{dictionary} +\obindex{mutable} + +\end{description} % Mapping types + +\item[Callable types] +These are the types to which the function call (invocation) operation, +written as \code{function(argument, argument, ...)}, can be applied: +\indexii{function}{call} +\index{invocation} +\indexii{function}{argument} +\obindex{callable} + +\begin{description} + +\item[User-defined functions] +A user-defined function object is created by a function definition +(see section \ref{function}). It should be called with an argument +list containing the same number of items as the function's formal +parameter list. +\indexii{user-defined}{function} +\obindex{function} +\obindex{user-defined function} + +Special read-only attributes: \member{func_code} is the code object +representing the compiled function body, and \member{func_globals} is (a +reference to) the dictionary that holds the function's global +variables --- it implements the global name space of the module in +which the function was defined. +\ttindex{func_code} +\ttindex{func_globals} +\indexii{global}{name space} + +\item[User-defined methods] +A user-defined method (a.k.a. \dfn{object closure}) is a pair of a +class instance object and a user-defined function. It should be +called with an argument list containing one item less than the number +of items in the function's formal parameter list. When called, the +class instance becomes the first argument, and the call arguments are +shifted one to the right. +\obindex{method} +\obindex{user-defined method} +\indexii{user-defined}{method} +\index{object closure} + +Special read-only attributes: \member{im_self} is the class instance +object, \member{im_func} is the function object. +\ttindex{im_func} +\ttindex{im_self} + +\item[Built-in functions] +A built-in function object is a wrapper around a C function. Examples +of built-in functions are \function{len()} and \function{math.sin()}. There +are no special attributes. The number and type of the arguments are +determined by the C function. +\obindex{built-in function} +\obindex{function} +\indexii{C}{language} + +\item[Built-in methods] +This is really a different disguise of a built-in function, this time +containing an object passed to the \C{} function as an implicit extra +argument. An example of a built-in method is \code{\var{list}.append()} if +\var{list} is a list object. +\obindex{built-in method} +\obindex{method} +\indexii{built-in}{method} + +\item[Classes] +Class objects are described below. When a class object is called as a +function, a new class instance (also described below) is created and +returned. This implies a call to the class's \method{__init__()} method +if it has one. Any arguments are passed on to the \method{__init__()} +method --- if there is no \method{__init__()} method, the class must be called +without arguments. +\ttindex{__init__} +\obindex{class} +\obindex{class instance} +\obindex{instance} +\indexii{class object}{call} + +\end{description} + +\item[Modules] +Modules are imported by the \keyword{import} statement (see section +\ref{import}). A module object is a container for a module's name +space, which is a dictionary (the same dictionary as referenced by the +\member{func_globals} attribute of functions defined in the module). +Module attribute references are translated to lookups in this +dictionary. A module object does not contain the code object used to +initialize the module (since it isn't needed once the initialization +is done). +\stindex{import} +\obindex{module} + +Attribute assignment update the module's name space dictionary. + +Special read-only attribute: \member{__dict__} yields the module's name +space as a dictionary object. Predefined attributes: \member{__name__} +yields the module's name as a string object; \member{__doc__} yields the +module's documentation string as a string object, or +\code{None} if no documentation string was found. +\ttindex{__dict__} +\ttindex{__name__} +\ttindex{__doc__} +\indexii{module}{name space} + +\item[Classes] +Class objects are created by class definitions (see section +\ref{class}). A class is a container for a dictionary containing the +class's name space. Class attribute references are translated to +lookups in this dictionary. When an attribute name is not found +there, the attribute search continues in the base classes. The search +is depth-first, left-to-right in the order of their occurrence in the +base class list. +\obindex{class} +\obindex{class instance} +\obindex{instance} +\indexii{class object}{call} +\index{container} +\obindex{dictionary} +\indexii{class}{attribute} + +Class attribute assignments update the class's dictionary, never the +dictionary of a base class. +\indexiii{class}{attribute}{assignment} + +A class can be called as a function to yield a class instance (see +above). +\indexii{class object}{call} + +Special read-only attributes: \member{__dict__} yields the dictionary +containing the class's name space; \member{__bases__} yields a tuple +(possibly empty or a singleton) containing the base classes, in the +order of their occurrence in the base class list. +\ttindex{__dict__} +\ttindex{__bases__} + +\item[Class instances] +A class instance is created by calling a class object as a +function. A class instance has a dictionary in which +attribute references are searched. When an attribute is not found +there, and the instance's class has an attribute by that name, and +that class attribute is a user-defined function (and in no other +cases), the instance attribute reference yields a user-defined method +object (see above) constructed from the instance and the function. +\obindex{class instance} +\obindex{instance} +\indexii{class}{instance} +\indexii{class instance}{attribute} + +Attribute assignments update the instance's dictionary. +\indexiii{class instance}{attribute}{assignment} + +Class instances can pretend to be numbers, sequences, or mappings if +they have methods with certain special names. These are described in +section \ref{specialnames}. +\obindex{number} +\obindex{sequence} +\obindex{mapping} + +Special read-only attributes: \member{__dict__} yields the attribute +dictionary; \member{__class__} yields the instance's class. +\ttindex{__dict__} +\ttindex{__class__} + +\item[Files] +A file object represents an open file. (It is a wrapper around a \C{} +\code{stdio} file pointer.) File objects are created by the +\function{open()} built-in function, and also by \function{posix.popen()} and +the \method{makefile()} method of socket objects. \code{sys.stdin}, +\code{sys.stdout} and \code{sys.stderr} are file objects corresponding +to the interpreter's standard input, output and error streams. +See the \emph{Python Library Reference} for methods of file objects +and other details. +\obindex{file} +\indexii{C}{language} +\index{stdio} +\bifuncindex{open} +\bifuncindex{popen} +\bifuncindex{makefile} +\ttindex{stdin} +\ttindex{stdout} +\ttindex{stderr} +\ttindex{sys.stdin} +\ttindex{sys.stdout} +\ttindex{sys.stderr} + +\item[Internal types] +A few types used internally by the interpreter are exposed to the user. +Their definition may change with future versions of the interpreter, +but they are mentioned here for completeness. +\index{internal type} +\index{types, internal} + +\begin{description} + +\item[Code objects] +Code objects represent ``pseudo-compiled'' executable Python code. +The difference between a code +object and a function object is that the function object contains an +explicit reference to the function's context (the module in which it +was defined) while a code object contains no context. +\obindex{code} + +Special read-only attributes: \member{co_code} is a string representing +the sequence of instructions; \member{co_consts} is a list of literals +used by the code; \member{co_names} is a list of names (strings) used by +the code; \member{co_filename} is the filename from which the code was +compiled. (To find out the line numbers, you would have to decode the +instructions; the standard library module +\module{dis}\refstmodindex{dis} contains an example of how to do +this.) +\ttindex{co_code} +\ttindex{co_consts} +\ttindex{co_names} +\ttindex{co_filename} + +\item[Frame objects] +Frame objects represent execution frames. They may occur in traceback +objects (see below). +\obindex{frame} + +Special read-only attributes: \member{f_back} is to the previous +stack frame (towards the caller), or \code{None} if this is the bottom +stack frame; \member{f_code} is the code object being executed in this +frame; \member{f_globals} is the dictionary used to look up global +variables; \member{f_locals} is used for local variables; +\member{f_lineno} gives the line number and \member{f_lasti} gives the +precise instruction (this is an index into the instruction string of +the code object). +\ttindex{f_back} +\ttindex{f_code} +\ttindex{f_globals} +\ttindex{f_locals} +\ttindex{f_lineno} +\ttindex{f_lasti} + +\item[Traceback objects] \label{traceback} +Traceback objects represent a stack trace of an exception. A +traceback object is created when an exception occurs. When the search +for an exception handler unwinds the execution stack, at each unwound +level a traceback object is inserted in front of the current +traceback. When an exception handler is entered +(see also section \ref{try}), the stack trace is +made available to the program as \code{sys.exc_traceback}. When the +program contains no suitable handler, the stack trace is written +(nicely formatted) to the standard error stream; if the interpreter is +interactive, it is also made available to the user as +\code{sys.last_traceback}. +\obindex{traceback} +\indexii{stack}{trace} +\indexii{exception}{handler} +\indexii{execution}{stack} +\ttindex{exc_traceback} +\ttindex{last_traceback} +\ttindex{sys.exc_traceback} +\ttindex{sys.last_traceback} + +Special read-only attributes: \member{tb_next} is the next level in the +stack trace (towards the frame where the exception occurred), or +\code{None} if there is no next level; \member{tb_frame} points to the +execution frame of the current level; \member{tb_lineno} gives the line +number where the exception occurred; \member{tb_lasti} indicates the +precise instruction. The line number and last instruction in the +traceback may differ from the line number of its frame object if the +exception occurred in a \keyword{try} statement with no matching +except clause or with a finally clause. +\ttindex{tb_next} +\ttindex{tb_frame} +\ttindex{tb_lineno} +\ttindex{tb_lasti} +\stindex{try} + +\end{description} % Internal types + +\end{description} % Types + + +\section{Special method names} \label{specialnames} + +A class can implement certain operations that are invoked by special +syntax (such as subscription or arithmetic operations) by defining +methods with special names. For instance, if a class defines a +method named \method{__getitem__()}, and \code{x} is an instance of this +class, then \code{x[i]} is equivalent to \code{x.__getitem__(i)}. +(The reverse is not true --- if \code{x} is a list object, +\code{x.__getitem__(i)} is not equivalent to \code{x[i]}.) +\ttindex{__getitem__} + +Except for \method{__repr__()}, \method{__str__()} and \method{__cmp__()}, +attempts to execute an +operation raise an exception when no appropriate method is defined. +For \method{__repr__()}, the default is to return a string describing the +object's class and address. +For \method{__cmp__()}, the default is to compare instances based on their +address. +For \method{__str__()}, the default is to use \method{__repr__()}. +\ttindex{__repr__} +\ttindex{__str__} +\ttindex{__cmp__} + + +\subsection{Special methods for any type} + +\begin{description} + +\item[{\tt __init__(self, args...)}] +Called when the instance is created. The arguments are those passed +to the class constructor expression. If a base class has an +\code{__init__} method the derived class's \code{__init__} method must +explicitly call it to ensure proper initialization of the base class +part of the instance. +\ttindex{__init__} +\indexii{class}{constructor} + + +\item[{\tt __del__(self)}] +Called when the instance is about to be destroyed. If a base class +has a \method{__del__()} method the derived class's \method{__del__()} method +must explicitly call it to ensure proper deletion of the base class +part of the instance. Note that it is possible for the \method{__del__()} +method to postpone destruction of the instance by creating a new +reference to it. It may then be called at a later time when this new +reference is deleted. It is not guaranteed that +\method{__del__()} methods are called for objects that still exist when +the interpreter exits. +If an exception occurs in a \method{__del__()} method, it is ignored, and +a warning is printed on stderr. +\ttindex{__del__} +\stindex{del} + +Note that \code{del x} doesn't directly call \code{x.__del__()} --- the +former decrements the reference count for \code{x} by one, but +\code{x.__del__()} is only called when its reference count reaches zero. + +\strong{Warning:} due to the precarious circumstances under which +\code{__del__()} methods are executed, exceptions that occur during +their execution are \emph{ignored}. + +\item[{\tt __repr__(self)}] +Called by the \function{repr()} built-in function and by string conversions +(reverse or backward quotes) to compute the string representation of an object. +\ttindex{__repr__} +\bifuncindex{repr} +\indexii{string}{conversion} +\indexii{reverse}{quotes} +\indexii{backward}{quotes} +\index{back-quotes} + +\item[{\tt __str__(self)}] +Called by the \function{str()} built-in function and by the \keyword{print} +statement compute the string representation of an object. +\ttindex{__str__} +\bifuncindex{str} +\stindex{print} + +\item[{\tt __cmp__(self, other)}] +Called by all comparison operations. Should return \code{-1} if +\code{self < other}, \code{0} if \code{self == other}, \code{+1} if +\code{self > other}. If no \method{__cmp__()} operation is defined, class +instances are compared by object identity (``address''). +(Implementation note: due to limitations in the interpreter, +exceptions raised by comparisons are ignored, and the objects will be +considered equal in this case.) +\ttindex{__cmp__} +\bifuncindex{cmp} +\index{comparisons} + +\item[{\tt __hash__(self)}] +Called for the key object for dictionary operations, +and by the built-in function +\function{hash()}\bifuncindex{hash}. Should return a 32-bit integer +usable as a hash value +for dictionary operations. The only required property is that objects +which compare equal have the same hash value; it is advised to somehow +mix together (e.g.\ using exclusive or) the hash values for the +components of the object that also play a part in comparison of +objects. If a class does not define a \method{__cmp__()} method it should +not define a \method{__hash__()} operation either; if it defines +\method{__cmp__()} but not \method{__hash__()} its instances will not be +usable as dictionary keys. If a class defines mutable objects and +implements a \method{__cmp__()} method it should not implement +\method{__hash__()}, since the dictionary implementation assumes that a +key's hash value is a constant. +\obindex{dictionary} +\ttindex{__cmp__} +\ttindex{__hash__} + +\item[{\tt __call__(self, *args)}] +Called when the instance is ``called'' as a function. +\ttindex{__call__} +\indexii{call}{instance} + +\end{description} + + +\subsection{Special methods for attribute access} + +The following methods can be used to change the meaning of attribute +access for class instances. + +\begin{description} + +\item[{\tt __getattr__(self, name)}] +Called when an attribute lookup has not found the attribute in the +usual places (i.e. it is not an instance attribute nor is it found in +the class tree for \code{self}). \code{name} is the attribute name. +\ttindex{__getattr__} + +Note that if the attribute is found through the normal mechanism, +\code{__getattr__} is not called. (This is an asymmetry between +\code{__getattr__} and \code{__setattr__}.) +This is done both for efficiency reasons and because otherwise +\code{__getattr__} would have no way to access other attributes of the +instance. +Note that at least for instance variables, \code{__getattr__} can fake +total control by simply not inserting any values in the instance +attribute dictionary. +\ttindex{__setattr__} + +\item[{\tt __setattr__(self, name, value)}] +Called when an attribute assignment is attempted. This is called +instead of the normal mechanism (i.e. store the value as an instance +attribute). \code{name} is the attribute name, \code{value} is the +value to be assigned to it. +\ttindex{__setattr__} + +If \code{__setattr__} wants to assign to an instance attribute, it +should not simply execute \code{self.\var{name} = value} --- this would +cause a recursive call. Instead, it should insert the value in the +dictionary of instance attributes, e.g.\ \code{self.__dict__[name] = +value}. +\ttindex{__dict__} + +\item[{\tt __delattr__(self, name)}] +Like \code{__setattr__} but for attribute deletion instead of +assignment. +\ttindex{__delattr__} + +\end{description} + + +\subsection{Special methods for sequence and mapping types} + +\begin{description} + +\item[{\tt __len__(self)}] +Called to implement the built-in function \function{len()}. Should return +the length of the object, an integer \code{>=} 0. Also, an object +whose \method{__len__()} method returns 0 is considered to be false in a +Boolean context. +\ttindex{__len__} + +\item[{\tt __getitem__(self, key)}] +Called to implement evaluation of \code{self[key]}. Note that the +special interpretation of negative keys (if the class wishes to +emulate a sequence type) is up to the \method{__getitem__()} method. +\ttindex{__getitem__} + +\item[{\tt __setitem__(self, key, value)}] +Called to implement assignment to \code{self[key]}. Same note as for +\method{__getitem__()}. +\ttindex{__setitem__} + +\item[{\tt __delitem__(self, key)}] +Called to implement deletion of \code{self[key]}. Same note as for +\method{__getitem__()}. +\ttindex{__delitem__} + +\end{description} + + +\subsection{Special methods for sequence types} + +\begin{description} + +\item[{\tt __getslice__(self, i, j)}] +Called to implement evaluation of \code{self[i:j]}. Note that missing +\code{i} or \code{j} are replaced by 0 or \code{len(self)}, +respectively, and \code{len(self)} has been added (once) to originally +negative \code{i} or \code{j} by the time this function is called +(unlike for \method{__getitem__()}). +\ttindex{__getslice__} + +\item[{\tt __setslice__(self, i, j, sequence)}] +Called to implement assignment to \code{self[i:j]}. Same notes as for +\method{__getslice__()}. +\ttindex{__setslice__} + +\item[{\tt __delslice__(self, i, j)}] +Called to implement deletion of \code{self[i:j]}. Same notes as for +\method{__getslice__()}. +\ttindex{__delslice__} + +\end{description} + + +\subsection{Special methods for numeric types} + +\begin{description} + +\item[{\tt __add__(self, other)}]\itemjoin +\item[{\tt __sub__(self, other)}]\itemjoin +\item[{\tt __mul__(self, other)}]\itemjoin +\item[{\tt __div__(self, other)}]\itemjoin +\item[{\tt __mod__(self, other)}]\itemjoin +\item[{\tt __divmod__(self, other)}]\itemjoin +\item[{\tt __pow__(self, other)}]\itemjoin +\item[{\tt __lshift__(self, other)}]\itemjoin +\item[{\tt __rshift__(self, other)}]\itemjoin +\item[{\tt __and__(self, other)}]\itemjoin +\item[{\tt __xor__(self, other)}]\itemjoin +\item[{\tt __or__(self, other)}]\itembreak +Called to implement the binary arithmetic operations (\code{+}, +\code{-}, \code{*}, \code{/}, \code{\%}, \function{divmod()}, \function{pow()}, +\code{<<}, \code{>>}, \code{\&}, \code{\^}, \code{|}). +\ttindex{__or__} +\ttindex{__xor__} +\ttindex{__and__} +\ttindex{__rshift__} +\ttindex{__lshift__} +\ttindex{__pow__} +\ttindex{__divmod__} +\ttindex{__mod__} +\ttindex{__div__} +\ttindex{__mul__} +\ttindex{__sub__} +\ttindex{__add__} + +\item[{\tt __neg__(self)}]\itemjoin +\item[{\tt __pos__(self)}]\itemjoin +\item[{\tt __abs__(self)}]\itemjoin +\item[{\tt __invert__(self)}]\itembreak +Called to implement the unary arithmetic operations (\code{-}, \code{+}, +\function{abs()} and \code{~}). +\ttindex{__invert__} +\ttindex{__abs__} +\ttindex{__pos__} +\ttindex{__neg__} + +\item[{\tt __nonzero__(self)}] +Called to implement boolean testing; should return 0 or 1. An +alternative name for this method is \method{__len__()}. +\ttindex{__nonzero__} + +\item[{\tt __coerce__(self, other)}] +Called to implement ``mixed-mode'' numeric arithmetic. Should either +return a tuple containing self and other converted to a common numeric +type, or None if no way of conversion is known. When the common type +would be the type of other, it is sufficient to return None, since the +interpreter will also ask the other object to attempt a coercion (but +sometimes, if the implementation of the other type cannot be changed, +it is useful to do the conversion to the other type here). +\ttindex{__coerce__} + +Note that this method is not called to coerce the arguments to \code{+} +and \code{*}, because these are also used to implement sequence +concatenation and repetition, respectively. Also note that, for the +same reason, in \code{\var{n} * \var{x}}, where \var{n} is a built-in +number and \var{x} is an instance, a call to +\code{\var{x}.__mul__(\var{n})} is made.% +\footnote{The interpreter should really distinguish between +user-defined classes implementing sequences, mappings or numbers, but +currently it doesn't --- hence this strange exception.} +\ttindex{__mul__} + +\item[{\tt __int__(self)}]\itemjoin +\item[{\tt __long__(self)}]\itemjoin +\item[{\tt __float__(self)}]\itembreak +Called to implement the built-in functions \function{int()}, \function{long()} +and \function{float()}. Should return a value of the appropriate type. +\ttindex{__float__} +\ttindex{__long__} +\ttindex{__int__} + +\item[{\tt __oct__(self)}]\itemjoin +\item[{\tt __hex__(self)}]\itembreak +Called to implement the built-in functions \function{oct()} and +\function{hex()}. Should return a string value. +\ttindex{__hex__} +\ttindex{__oct__} + +\end{description} diff --git a/Doc/ref4.tex b/Doc/ref4.tex new file mode 100644 index 0000000..9ab448b --- /dev/null +++ b/Doc/ref4.tex @@ -0,0 +1,200 @@ +\chapter{Execution model} +\index{execution model} + +\section{Code blocks, execution frames, and name spaces} \label{execframes} +\index{code block} +\indexii{execution}{frame} +\index{name space} + +A {\em code block} is a piece of Python program text that can be +executed as a unit, such as a module, a class definition or a function +body. Some code blocks (like modules) are executed only once, others +(like function bodies) may be executed many times. Code blocks may +textually contain other code blocks. Code blocks may invoke other +code blocks (that may or may not be textually contained in them) as +part of their execution, e.g. by invoking (calling) a function. +\index{code block} +\indexii{code}{block} + +The following are code blocks: A module is a code block. A function +body is a code block. A class definition is a code block. Each +command typed interactively is a separate code block; a script file is +a code block. The string argument passed to the built-in function +\function{eval()} and to the \keyword{exec} statement are code blocks. +And finally, the expression read and evaluated by the built-in +function \function{input()} is a code block. + +A code block is executed in an execution frame. An {\em execution +frame} contains some administrative information (used for debugging), +determines where and how execution continues after the code block's +execution has completed, and (perhaps most importantly) defines two +name spaces, the local and the global name space, that affect +execution of the code block. +\indexii{execution}{frame} + +A {\em name space} is a mapping from names (identifiers) to objects. +A particular name space may be referenced by more than one execution +frame, and from other places as well. Adding a name to a name space +is called {\em binding} a name (to an object); changing the mapping of +a name is called {\em rebinding}; removing a name is {\em unbinding}. +Name spaces are functionally equivalent to dictionaries. +\index{name space} +\indexii{binding}{name} +\indexii{rebinding}{name} +\indexii{unbinding}{name} + +The {\em local name space} of an execution frame determines the default +place where names are defined and searched. The {\em global name +space} determines the place where names listed in \keyword{global} +statements are defined and searched, and where names that are not +explicitly bound in the current code block are searched. +\indexii{local}{name space} +\indexii{global}{name space} +\stindex{global} + +Whether a name is local or global in a code block is determined by +static inspection of the source text for the code block: in the +absence of \keyword{global} statements, a name that is bound anywhere in +the code block is local in the entire code block; all other names are +considered global. The \keyword{global} statement forces global +interpretation of selected names throughout the code block. The +following constructs bind names: formal parameters, \keyword{import} +statements, class and function definitions (these bind the class or +function name), and targets that are identifiers if occurring in an +assignment, \keyword{for} loop header, or except clause header. + +A target occurring in a \keyword{del} statement is also considered bound +for this purpose (though the actual semantics are to ``unbind'' the +name). + +When a global name is not found in the global name space, it is +searched in the list of ``built-in'' names (which is actually the +global name space of the module \module{__builtin__}). When a name is not +found at all, the \exception{NameError} exception is raised.% +\footnote{If the code block contains \keyword{exec} statements or the +construct \samp{from \ldots import *}, the semantics of names not +explicitly mentioned in a {\tt global} statement change subtly: name +lookup first searches the local name space, then the global one, then +the built-in one.} +\refbimodindex{__builtin__} +\stindex{from} +\stindex{exec} +\stindex{global} +\withsubitem{(built-in exception)}{\ttindex{NameError}} + +The following table lists the meaning of the local and global name +space for various types of code blocks. The name space for a +particular module is automatically created when the module is first +referenced. Note that in almost all cases, the global name space is +the name space of the containing module --- scopes in Python do not +nest! + +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Code block type & Global name space & Local name space & Notes \\ +\hline +Module & n.s. for this module & same as global & \\ +Script & n.s. for \module{__main__} & same as global & \\ +Interactive command & n.s. for \module{__main__} & same as global & \\ +Class definition & global n.s. of containing block & new n.s. & \\ +Function body & global n.s. of containing block & new n.s. & (2) \\ +String passed to \keyword{exec} statement + & global n.s. of containing block + & local n.s. of containing block & (1) \\ +String passed to \function{eval()} + & global n.s. of caller & local n.s. of caller & (1) \\ +File read by \function{execfile()} + & global n.s. of caller & local n.s. of caller & (1) \\ +Expression read by \function{input()} + & global n.s. of caller & local n.s. of caller & \\ +\hline +\end{tabular} +\end{center} +\refbimodindex{__main__} + +Notes: + +\begin{description} + +\item[n.s.] means {\em name space} + +\item[(1)] The global and local name space for these can be +overridden with optional extra arguments. + +\item[(2)] The body of lambda forms (see section \ref{lambda}) is +treated exactly the same as a (nested) function definition. Lambda +forms have their own name space consisting of their formal arguments. +\indexii{lambda}{form} + +\end{description} + +The built-in functions \function{globals()} and \function{locals()} returns a +dictionary representing the current global and local name space, +respectively. The effect of modifications to this dictionary on the +name space are undefined.% +\footnote{The current implementations return the dictionary actually +used to implement the name space, {\em except} for functions, where +the optimizer may cause the local name space to be implemented +differently, and \function{locals()} returns a read-only dictionary.} + +\section{Exceptions} + +Exceptions are a means of breaking out of the normal flow of control +of a code block in order to handle errors or other exceptional +conditions. An exception is {\em raised} at the point where the error +is detected; it may be {\em handled} by the surrounding code block or +by any code block that directly or indirectly invoked the code block +where the error occurred. +\index{exception} +\index{raise an exception} +\index{handle an exception} +\index{exception handler} +\index{errors} +\index{error handling} + +The Python interpreter raises an exception when it detects an run-time +error (such as division by zero). A Python program can also +explicitly raise an exception with the \keyword{raise} statement. +Exception handlers are specified with the \keyword{try} ... \keyword{except} +statement. + +Python uses the ``termination'' model of error handling: an exception +handler can find out what happened and continue execution at an outer +level, but it cannot repair the cause of the error and retry the +failing operation (except by re-entering the the offending piece of +code from the top). + +When an exception is not handled at all, the interpreter terminates +execution of the program, or returns to its interactive main loop. + +Exceptions are identified by string objects or class instances. Two +different string objects with the same value identify different +exceptions. An exception can be raised with a class instance. Such +exceptions are caught by specifying an except clause that has the +class name (or a base class) as the condition. + +When an exception is raised, an object (maybe \code{None}) is passed +as the exception's ``parameter''; this object does not affect the +selection of an exception handler, but is passed to the selected +exception handler as additional information. For exceptions raised +with a class instance, the instance is passed as the ``parameter''. + +For example: + +\begin{verbatim} +>>> class Error: +... def __init__(self, msg): self.msg = msg +... +>>> class SpecificError(Error): pass +... +>>> try: +... raise SpecificError('broken') +... except Error, obj: +... print obj.msg +... +broken +\end{verbatim} + +See also the description of the \keyword{try} and \keyword{raise} +statements. diff --git a/Doc/ref5.tex b/Doc/ref5.tex new file mode 100644 index 0000000..b2fea3c --- /dev/null +++ b/Doc/ref5.tex @@ -0,0 +1,759 @@ +\chapter{Expressions and conditions} +\index{expression} +\index{condition} + +{\bf Note:} In this and the following chapters, extended BNF notation +will be used to describe syntax, not lexical analysis. +\index{BNF} + +This chapter explains the meaning of the elements of expressions and +conditions. Conditions are a superset of expressions, and a condition +may be used wherever an expression is required by enclosing it in +parentheses. The only places where expressions are used in the syntax +instead of conditions is in expression statements and on the +right-hand side of assignment statements; this catches some nasty bugs +like accidentally writing \verb@x == 1@ instead of \verb@x = 1@. +\indexii{assignment}{statement} + +The comma plays several roles in Python's syntax. It is usually an +operator with a lower precedence than all others, but occasionally +serves other purposes as well; e.g. it separates function arguments, +is used in list and dictionary constructors, and has special semantics +in \verb@print@ statements. +\index{comma} + +When (one alternative of) a syntax rule has the form + +\begin{verbatim} +name: othername +\end{verbatim} + +and no semantics are given, the semantics of this form of \verb@name@ +are the same as for \verb@othername@. +\index{syntax} + +\section{Arithmetic conversions} +\indexii{arithmetic}{conversion} + +When a description of an arithmetic operator below uses the phrase +``the numeric arguments are converted to a common type'', +this both means that if either argument is not a number, a +\verb@TypeError@ exception is raised, and that otherwise +the following conversions are applied: +\exindex{TypeError} +\indexii{floating point}{number} +\indexii{long}{integer} +\indexii{plain}{integer} + +\begin{itemize} +\item first, if either argument is a floating point number, + the other is converted to floating point; +\item else, if either argument is a long integer, + the other is converted to long integer; +\item otherwise, both must be plain integers and no conversion + is necessary. +\end{itemize} + +\section{Atoms} +\index{atom} + +Atoms are the most basic elements of expressions. Forms enclosed in +reverse quotes or in parentheses, brackets or braces are also +categorized syntactically as atoms. The syntax for atoms is: + +\begin{verbatim} +atom: identifier | literal | enclosure +enclosure: parenth_form|list_display|dict_display|string_conversion +\end{verbatim} + +\subsection{Identifiers (Names)} +\index{name} +\index{identifier} + +An identifier occurring as an atom is a reference to a local, global +or built-in name binding. If a name is assigned to anywhere in a code +block (even in unreachable code), and is not mentioned in a +\verb@global@ statement in that code block, then it refers to a local +name throughout that code block. When it is not assigned to anywhere +in the block, or when it is assigned to but also explicitly listed in +a \verb@global@ statement, it refers to a global name if one exists, +else to a built-in name (and this binding may dynamically change). +\indexii{name}{binding} +\index{code block} +\stindex{global} +\indexii{built-in}{name} +\indexii{global}{name} + +When the name is bound to an object, evaluation of the atom yields +that object. When a name is not bound, an attempt to evaluate it +raises a \verb@NameError@ exception. +\exindex{NameError} + +\subsection{Literals} +\index{literal} + +Python knows string and numeric literals: + +\begin{verbatim} +literal: stringliteral | integer | longinteger | floatnumber +\end{verbatim} + +Evaluation of a literal yields an object of the given type (string, +integer, long integer, floating point number) with the given value. +The value may be approximated in the case of floating point literals. +See section \ref{literals} for details. + +All literals correspond to immutable data types, and hence the +object's identity is less important than its value. Multiple +evaluations of literals with the same value (either the same +occurrence in the program text or a different occurrence) may obtain +the same object or a different object with the same value. +\indexiii{immutable}{data}{type} + +(In the original implementation, all literals in the same code block +with the same type and value yield the same object.) + +\subsection{Parenthesized forms} +\index{parenthesized form} + +A parenthesized form is an optional condition list enclosed in +parentheses: + +\begin{verbatim} +parenth_form: "(" [condition_list] ")" +\end{verbatim} + +A parenthesized condition list yields whatever that condition list +yields. + +An empty pair of parentheses yields an empty tuple object. Since +tuples are immutable, the rules for literals apply here. +\indexii{empty}{tuple} + +(Note that tuples are not formed by the parentheses, but rather by use +of the comma operator. The exception is the empty tuple, for which +parentheses {\em are} required --- allowing unparenthesized ``nothing'' +in expressions would cause ambiguities and allow common typos to +pass uncaught.) +\index{comma} +\indexii{tuple}{display} + +\subsection{List displays} +\indexii{list}{display} + +A list display is a possibly empty series of conditions enclosed in +square brackets: + +\begin{verbatim} +list_display: "[" [condition_list] "]" +\end{verbatim} + +A list display yields a new list object. +\obindex{list} + +If it has no condition list, the list object has no items. Otherwise, +the elements of the condition list are evaluated from left to right +and inserted in the list object in that order. +\indexii{empty}{list} + +\subsection{Dictionary displays} \label{dict} +\indexii{dictionary}{display} + +A dictionary display is a possibly empty series of key/datum pairs +enclosed in curly braces: +\index{key} +\index{datum} +\index{key/datum pair} + +\begin{verbatim} +dict_display: "{" [key_datum_list] "}" +key_datum_list: key_datum ("," key_datum)* [","] +key_datum: condition ":" condition +\end{verbatim} + +A dictionary display yields a new dictionary object. +\obindex{dictionary} + +The key/datum pairs are evaluated from left to right to define the +entries of the dictionary: each key object is used as a key into the +dictionary to store the corresponding datum. + +Restrictions on the types of the key values are listed earlier in +section \ref{types}. +Clashes between duplicate keys are not detected; the last +datum (textually rightmost in the display) stored for a given key +value prevails. +\exindex{TypeError} + +\subsection{String conversions} +\indexii{string}{conversion} +\indexii{reverse}{quotes} +\indexii{backward}{quotes} +\index{back-quotes} + +A string conversion is a condition list enclosed in reverse (or +backward) quotes: + +\begin{verbatim} +string_conversion: "`" condition_list "`" +\end{verbatim} + +A string conversion evaluates the contained condition list and +converts the resulting object into a string according to rules +specific to its type. + +If the object is a string, a number, \verb@None@, or a tuple, list or +dictionary containing only objects whose type is one of these, the +resulting string is a valid Python expression which can be passed to +the built-in function \verb@eval()@ to yield an expression with the +same value (or an approximation, if floating point numbers are +involved). + +(In particular, converting a string adds quotes around it and converts +``funny'' characters to escape sequences that are safe to print.) + +It is illegal to attempt to convert recursive objects (e.g. lists or +dictionaries that contain a reference to themselves, directly or +indirectly.) +\obindex{recursive} + +The built-in function \verb@repr()@ performs exactly the same +conversion in its argument as enclosing it it reverse quotes does. +The built-in function \verb@str()@ performs a similar but more +user-friendly conversion. +\bifuncindex{repr} +\bifuncindex{str} + +\section{Primaries} \label{primaries} +\index{primary} + +Primaries represent the most tightly bound operations of the language. +Their syntax is: + +\begin{verbatim} +primary: atom | attributeref | subscription | slicing | call +\end{verbatim} + +\subsection{Attribute references} +\indexii{attribute}{reference} + +An attribute reference is a primary followed by a period and a name: + +\begin{verbatim} +attributeref: primary "." identifier +\end{verbatim} + +The primary must evaluate to an object of a type that supports +attribute references, e.g. a module or a list. This object is then +asked to produce the attribute whose name is the identifier. If this +attribute is not available, the exception \verb@AttributeError@ is +raised. Otherwise, the type and value of the object produced is +determined by the object. Multiple evaluations of the same attribute +reference may yield different objects. +\obindex{module} +\obindex{list} + +\subsection{Subscriptions} +\index{subscription} + +A subscription selects an item of a sequence (string, tuple or list) +or mapping (dictionary) object: +\obindex{sequence} +\obindex{mapping} +\obindex{string} +\obindex{tuple} +\obindex{list} +\obindex{dictionary} +\indexii{sequence}{item} + +\begin{verbatim} +subscription: primary "[" condition "]" +\end{verbatim} + +The primary must evaluate to an object of a sequence or mapping type. + +If it is a mapping, the condition must evaluate to an object whose +value is one of the keys of the mapping, and the subscription selects +the value in the mapping that corresponds to that key. + +If it is a sequence, the condition must evaluate to a plain integer. +If this value is negative, the length of the sequence is added to it +(so that, e.g. \verb@x[-1]@ selects the last item of \verb@x@.) +The resulting value must be a nonnegative integer smaller than the +number of items in the sequence, and the subscription selects the item +whose index is that value (counting from zero). + +A string's items are characters. A character is not a separate data +type but a string of exactly one character. +\index{character} +\indexii{string}{item} + +\subsection{Slicings} +\index{slicing} +\index{slice} + +A slicing (or slice) selects a range of items in a sequence (string, +tuple or list) object: +\obindex{sequence} +\obindex{string} +\obindex{tuple} +\obindex{list} + +\begin{verbatim} +slicing: primary "[" [condition] ":" [condition] "]" +\end{verbatim} + +The primary must evaluate to a sequence object. The lower and upper +bound expressions, if present, must evaluate to plain integers; +defaults are zero and the sequence's length, respectively. If either +bound is negative, the sequence's length is added to it. The slicing +now selects all items with index \var{k} such that +\code{\var{i} <= \var{k} < \var{j}} where \var{i} +and \var{j} are the specified lower and upper bounds. This may be an +empty sequence. It is not an error if \var{i} or \var{j} lie outside the +range of valid indexes (such items don't exist so they aren't +selected). + +\subsection{Calls} \label{calls} +\index{call} + +A call calls a callable object (e.g. a function) with a possibly empty +series of arguments:\footnote{The new syntax for keyword arguments is +not yet documented in this manual. See chapter 12 of the Tutorial.} +\obindex{callable} + +\begin{verbatim} +call: primary "(" [condition_list] ")" +\end{verbatim} + +The primary must evaluate to a callable object (user-defined +functions, built-in functions, methods of built-in objects, class +objects, and methods of class instances are callable). If it is a +class, the argument list must be empty; otherwise, the arguments are +evaluated. + +A call always returns some value, possibly \verb@None@, unless it +raises an exception. How this value is computed depends on the type +of the callable object. If it is: + +\begin{description} + +\item[a user-defined function:] the code block for the function is +executed, passing it the argument list. The first thing the code +block will do is bind the formal parameters to the arguments; this is +described in section \ref{function}. When the code block executes a +\verb@return@ statement, this specifies the return value of the +function call. +\indexii{function}{call} +\indexiii{user-defined}{function}{call} +\obindex{user-defined function} +\obindex{function} + +\item[a built-in function or method:] the result is up to the +interpreter; see the library reference manual for the descriptions of +built-in functions and methods. +\indexii{function}{call} +\indexii{built-in function}{call} +\indexii{method}{call} +\indexii{built-in method}{call} +\obindex{built-in method} +\obindex{built-in function} +\obindex{method} +\obindex{function} + +\item[a class object:] a new instance of that class is returned. +\obindex{class} +\indexii{class object}{call} + +\item[a class instance method:] the corresponding user-defined +function is called, with an argument list that is one longer than the +argument list of the call: the instance becomes the first argument. +\obindex{class instance} +\obindex{instance} +\indexii{instance}{call} +\indexii{class instance}{call} + +\end{description} + +\section{Unary arithmetic operations} +\indexiii{unary}{arithmetic}{operation} +\indexiii{unary}{bit-wise}{operation} + +All unary arithmetic (and bit-wise) operations have the same priority: + +\begin{verbatim} +u_expr: primary | "-" u_expr | "+" u_expr | "~" u_expr +\end{verbatim} + +The unary \verb@"-"@ (minus) operator yields the negation of its +numeric argument. +\index{negation} +\index{minus} + +The unary \verb@"+"@ (plus) operator yields its numeric argument +unchanged. +\index{plus} + +The unary \verb@"~"@ (invert) operator yields the bit-wise inversion +of its plain or long integer argument. The bit-wise inversion of +\verb@x@ is defined as \verb@-(x+1)@. +\index{inversion} + +In all three cases, if the argument does not have the proper type, +a \verb@TypeError@ exception is raised. +\exindex{TypeError} + +\section{Binary arithmetic operations} +\indexiii{binary}{arithmetic}{operation} + +The binary arithmetic operations have the conventional priority +levels. Note that some of these operations also apply to certain +non-numeric types. There is no ``power'' operator, so there are only +two levels, one for multiplicative operators and one for additive +operators: + +\begin{verbatim} +m_expr: u_expr | m_expr "*" u_expr + | m_expr "/" u_expr | m_expr "%" u_expr +a_expr: m_expr | aexpr "+" m_expr | aexpr "-" m_expr +\end{verbatim} + +The \verb@"*"@ (multiplication) operator yields the product of its +arguments. The arguments must either both be numbers, or one argument +must be a plain integer and the other must be a sequence. In the +former case, the numbers are converted to a common type and then +multiplied together. In the latter case, sequence repetition is +performed; a negative repetition factor yields an empty sequence. +\index{multiplication} + +The \verb@"/"@ (division) operator yields the quotient of its +arguments. The numeric arguments are first converted to a common +type. Plain or long integer division yields an integer of the same +type; the result is that of mathematical division with the `floor' +function applied to the result. Division by zero raises the +\verb@ZeroDivisionError@ exception. +\exindex{ZeroDivisionError} +\index{division} + +The \verb@"%"@ (modulo) operator yields the remainder from the +division of the first argument by the second. The numeric arguments +are first converted to a common type. A zero right argument raises +the \verb@ZeroDivisionError@ exception. The arguments may be floating +point numbers, e.g. \verb@3.14 % 0.7@ equals \verb@0.34@. The modulo +operator always yields a result with the same sign as its second +operand (or zero); the absolute value of the result is strictly +smaller than the second operand. +\index{modulo} + +The integer division and modulo operators are connected by the +following identity: \verb@x == (x/y)*y + (x%y)@. Integer division and +modulo are also connected with the built-in function \verb@divmod()@: +\verb@divmod(x, y) == (x/y, x%y)@. These identities don't hold for +floating point numbers; there a similar identity holds where +\verb@x/y@ is replaced by \verb@floor(x/y)@). + +The \verb@"+"@ (addition) operator yields the sum of its arguments. +The arguments must either both be numbers, or both sequences of the +same type. In the former case, the numbers are converted to a common +type and then added together. In the latter case, the sequences are +concatenated. +\index{addition} + +The \verb@"-"@ (subtraction) operator yields the difference of its +arguments. The numeric arguments are first converted to a common +type. +\index{subtraction} + +\section{Shifting operations} +\indexii{shifting}{operation} + +The shifting operations have lower priority than the arithmetic +operations: + +\begin{verbatim} +shift_expr: a_expr | shift_expr ( "<<" | ">>" ) a_expr +\end{verbatim} + +These operators accept plain or long integers as arguments. The +arguments are converted to a common type. They shift the first +argument to the left or right by the number of bits given by the +second argument. + +A right shift by \var{n} bits is defined as division by +\code{pow(2,\var{n})}. A left shift by \var{n} bits is defined as +multiplication with \code{pow(2,\var{n})}; for plain integers there is +no overflow check so this drops bits and flips the sign if the result +is not less than \code{pow(2,31)} in absolute value. + +Negative shift counts raise a \verb@ValueError@ exception. +\exindex{ValueError} + +\section{Binary bit-wise operations} +\indexiii{binary}{bit-wise}{operation} + +Each of the three bitwise operations has a different priority level: + +\begin{verbatim} +and_expr: shift_expr | and_expr "&" shift_expr +xor_expr: and_expr | xor_expr "^" and_expr +or_expr: xor_expr | or_expr "|" xor_expr +\end{verbatim} + +The \verb@"&"@ operator yields the bitwise AND of its arguments, which +must be plain or long integers. The arguments are converted to a +common type. +\indexii{bit-wise}{and} + +The \verb@"^"@ operator yields the bitwise XOR (exclusive OR) of its +arguments, which must be plain or long integers. The arguments are +converted to a common type. +\indexii{bit-wise}{xor} +\indexii{exclusive}{or} + +The \verb@"|"@ operator yields the bitwise (inclusive) OR of its +arguments, which must be plain or long integers. The arguments are +converted to a common type. +\indexii{bit-wise}{or} +\indexii{inclusive}{or} + +\section{Comparisons} +\index{comparison} + +Contrary to C, all comparison operations in Python have the same +priority, which is lower than that of any arithmetic, shifting or +bitwise operation. Also contrary to C, expressions like +\verb@a < b < c@ have the interpretation that is conventional in +mathematics: +\index{C} + +\begin{verbatim} +comparison: or_expr (comp_operator or_expr)* +comp_operator: "<"|">"|"=="|">="|"<="|"<>"|"!="|"is" ["not"]|["not"] "in" +\end{verbatim} + +Comparisons yield integer values: 1 for true, 0 for false. + +Comparisons can be chained arbitrarily, e.g. \code{x < y <= z} is +equivalent to \code{x < y and y <= z}, except that \code{y} is +evaluated only once (but in both cases \code{z} is not evaluated at all +when \code{x < y} is found to be false). +\indexii{chaining}{comparisons} + +Formally, if \var{a}, \var{b}, \var{c}, \ldots, \var{y}, \var{z} are +expressions and \var{opa}, \var{opb}, \ldots, \var{opy} are comparison +operators, then \var{a opa b opb c} \ldots \var{y opy z} is equivalent +to \var{a opa b} \code{and} \var{b opb c} \code{and} \ldots \code{and} +\var{y opy z}, except that each expression is evaluated at most once. + +Note that \var{a opa b opb c} doesn't imply any kind of comparison +between \var{a} and \var{c}, so that e.g.\ \code{x < y > z} is +perfectly legal (though perhaps not pretty). + +The forms \verb@<>@ and \verb@!=@ are equivalent; for consistency with +C, \verb@!=@ is preferred; where \verb@!=@ is mentioned below +\verb@<>@ is also implied. + +The operators {\tt "<", ">", "==", ">=", "<="}, and {\tt "!="} compare +the values of two objects. The objects needn't have the same type. +If both are numbers, they are coverted to a common type. Otherwise, +objects of different types {\em always} compare unequal, and are +ordered consistently but arbitrarily. + +(This unusual definition of comparison is done to simplify the +definition of operations like sorting and the \verb@in@ and +\verb@not@ \verb@in@ operators.) + +Comparison of objects of the same type depends on the type: + +\begin{itemize} + +\item +Numbers are compared arithmetically. + +\item +Strings are compared lexicographically using the numeric equivalents +(the result of the built-in function \verb@ord@) of their characters. + +\item +Tuples and lists are compared lexicographically using comparison of +corresponding items. + +\item +Mappings (dictionaries) are compared through lexicographic +comparison of their sorted (key, value) lists.% +\footnote{This is expensive since it requires sorting the keys first, +but about the only sensible definition. An earlier version of Python +compared dictionaries by identity only, but this caused surprises +because people expected to be able to test a dictionary for emptiness +by comparing it to {\tt \{\}}.} + +\item +Most other types compare unequal unless they are the same object; +the choice whether one object is considered smaller or larger than +another one is made arbitrarily but consistently within one +execution of a program. + +\end{itemize} + +The operators \verb@in@ and \verb@not in@ test for sequence +membership: if \var{y} is a sequence, \code{\var{x} in \var{y}} is +true if and only if there exists an index \var{i} such that +\code{\var{x} = \var{y}[\var{i}]}. +\code{\var{x} not in \var{y}} yields the inverse truth value. The +exception \verb@TypeError@ is raised when \var{y} is not a sequence, +or when \var{y} is a string and \var{x} is not a string of length one.% +\footnote{The latter restriction is sometimes a nuisance.} +\opindex{in} +\opindex{not in} +\indexii{membership}{test} +\obindex{sequence} + +The operators \verb@is@ and \verb@is not@ test for object identity: +\var{x} \code{is} \var{y} is true if and only if \var{x} and \var{y} +are the same object. \var{x} \code{is not} \var{y} yields the inverse +truth value. +\opindex{is} +\opindex{is not} +\indexii{identity}{test} + +\section{Boolean operations} \label{Booleans} +\indexii{Boolean}{operation} + +Boolean operations have the lowest priority of all Python operations: + +\begin{verbatim} +condition: or_test | lambda_form +or_test: and_test | or_test "or" and_test +and_test: not_test | and_test "and" not_test +not_test: comparison | "not" not_test +lambda_form: "lambda" [parameter_list]: condition +\end{verbatim} + +In the context of Boolean operations, and also when conditions are +used by control flow statements, the following values are interpreted +as false: \verb@None@, numeric zero of all types, empty sequences +(strings, tuples and lists), and empty mappings (dictionaries). All +other values are interpreted as true. + +The operator \verb@not@ yields 1 if its argument is false, 0 otherwise. +\opindex{not} + +The condition \var{x} \verb@and@ \var{y} first evaluates \var{x}; if +\var{x} is false, its value is returned; otherwise, \var{y} is +evaluated and the resulting value is returned. +\opindex{and} + +The condition \var{x} \verb@or@ \var{y} first evaluates \var{x}; if +\var{x} is true, its value is returned; otherwise, \var{y} is +evaluated and the resulting value is returned. +\opindex{or} + +(Note that \verb@and@ and \verb@or@ do not restrict the value and type +they return to 0 and 1, but rather return the last evaluated argument. +This is sometimes useful, e.g. if \verb@s@ is a string that should be +replaced by a default value if it is empty, the expression +\verb@s or 'foo'@ yields the desired value. Because \verb@not@ has to +invent a value anyway, it does not bother to return a value of the +same type as its argument, so e.g. \verb@not 'foo'@ yields \verb@0@, +not \verb@''@.) + +Lambda forms (lambda expressions) have the same syntactic position as +conditions. They are a shorthand to create anonymous functions; the +expression {\em {\tt lambda} arguments{\tt :} condition} +yields a function object that behaves virtually identical to one +defined with +{\em {\tt def} name {\tt (}arguments{\tt ): return} condition}. +See section \ref{function} for the syntax of +parameter lists. Note that functions created with lambda forms cannot +contain statements. +\label{lambda} +\indexii{lambda}{expression} +\indexii{lambda}{form} +\indexii{anonmymous}{function} + +\section{Expression lists and condition lists} +\indexii{expression}{list} +\indexii{condition}{list} + +\begin{verbatim} +expression_list: or_expr ("," or_expr)* [","] +condintion_list: condition ("," condition)* [","] +\end{verbatim} + +The only difference between expression lists and condition lists is +the lowest priority of operators that can be used in them without +being enclosed in parentheses; condition lists allow all operators, +while expression lists don't allow comparisons and Boolean operators +(they do allow bitwise and shift operators though). + +Expression lists are used in expression statements and assignments; +condition lists are used everywhere else where a list of +comma-separated values is required. + +An expression (condition) list containing at least one comma yields a +tuple. The length of the tuple is the number of expressions +(conditions) in the list. The expressions (conditions) are evaluated +from left to right. (Condition lists are used syntactically is a few +places where no tuple is constructed but a list of values is needed +nevertheless.) +\obindex{tuple} + +The trailing comma is required only to create a single tuple (a.k.a. a +{\em singleton}); it is optional in all other cases. A single +expression (condition) without a trailing comma doesn't create a +tuple, but rather yields the value of that expression (condition). +\indexii{trailing}{comma} + +(To create an empty tuple, use an empty pair of parentheses: +\verb@()@.) + +\section{Summary} + +The following table summarizes the operator precedences in Python, +from lowest precedence (least binding) to highest precedence (most +binding). Operators in the same box have the same precedence. Unless +the syntax is explicitly given, operators are binary. Operators in +the same box group left to right (except for comparisons, which +chain from left to right --- see above). + +\begin{center} +\begin{tabular}{|c|c|} +\hline +\code{or} & Boolean OR \\ +\hline +\code{and} & Boolean AND \\ +\hline +\code{not} \var{x} & Boolean NOT \\ +\hline +\code{in}, \code{not} \code{in} & Membership tests \\ +\code{is}, \code{is} \code{not} & Identity tests \\ +\code{<}, \code{<=}, \code{>}, \code{>=}, \code{<>}, \code{!=}, \code{=} & + Comparisons \\ +\hline +\code{|} & Bitwise OR \\ +\hline +\code{\^} & Bitwise XOR \\ +\hline +\code{\&} & Bitwise AND \\ +\hline +\code{<<}, \code{>>} & Shifts \\ +\hline +\code{+}, \code{-} & Addition and subtraction \\ +\hline +\code{*}, \code{/}, \code{\%} & Multiplication, division, remainder \\ +\hline +\code{+\var{x}}, \code{-\var{x}} & Positive, negative \\ +\code{\~\var{x}} & Bitwise not \\ +\hline +\code{\var{x}.\var{attribute}} & Attribute reference \\ +\code{\var{x}[\var{index}]} & Subscription \\ +\code{\var{x}[\var{index}:\var{index}]} & Slicing \\ +\code{\var{f}(\var{arguments}...)} & Function call \\ +\hline +\code{(\var{expressions}\ldots)} & Binding or tuple display \\ +\code{[\var{expressions}\ldots]} & List display \\ +\code{\{\var{key}:\var{datum}\ldots\}} & Dictionary display \\ +\code{`\var{expression}\ldots`} & String conversion \\ +\hline +\end{tabular} +\end{center} diff --git a/Doc/ref6.tex b/Doc/ref6.tex new file mode 100644 index 0000000..20c65f2 --- /dev/null +++ b/Doc/ref6.tex @@ -0,0 +1,512 @@ +\chapter{Simple statements} +\indexii{simple}{statement} + +Simple statements are comprised within a single logical line. +Several simple statements may occur on a single line separated +by semicolons. The syntax for simple statements is: + +\begin{verbatim} +simple_stmt: expression_stmt + | assignment_stmt + | pass_stmt + | del_stmt + | print_stmt + | return_stmt + | raise_stmt + | break_stmt + | continue_stmt + | import_stmt + | global_stmt + | exec_stmt +\end{verbatim} + +\section{Expression statements} +\indexii{expression}{statement} + +Expression statements are used (mostly interactively) to compute and +write a value, or (usually) to call a procedure (a function that +returns no meaningful result; in Python, procedures return the value +\code{None}): + +\begin{verbatim} +expression_stmt: condition_list +\end{verbatim} + +An expression statement evaluates the condition list (which may be a +single condition). +\indexii{expression}{list} + +In interactive mode, if the value is not \code{None}, it is converted +to a string using the rules for string conversions (expressions in +reverse quotes), and the resulting string is written to standard +output (see section \ref{print}) on a line by itself. +(The exception for \code{None} is made so that procedure calls, which +are syntactically equivalent to expressions, do not cause any output.) +\ttindex{None} +\indexii{string}{conversion} +\index{output} +\indexii{standard}{output} +\indexii{writing}{values} +\indexii{procedure}{call} + +\section{Assignment statements} +\indexii{assignment}{statement} + +Assignment statements are used to (re)bind names to values and to +modify attributes or items of mutable objects: +\indexii{binding}{name} +\indexii{rebinding}{name} +\obindex{mutable} +\indexii{attribute}{assignment} + +\begin{verbatim} +assignment_stmt: (target_list "=")+ expression_list +target_list: target ("," target)* [","] +target: identifier | "(" target_list ")" | "[" target_list "]" + | attributeref | subscription | slicing +\end{verbatim} + +(See section \ref{primaries} for the syntax definitions for the last +three symbols.) + +An assignment statement evaluates the expression list (remember that +this can be a single expression or a comma-separated list, the latter +yielding a tuple) and assigns the single resulting object to each of +the target lists, from left to right. +\indexii{expression}{list} + +Assignment is defined recursively depending on the form of the target +(list). When a target is part of a mutable object (an attribute +reference, subscription or slicing), the mutable object must +ultimately perform the assignment and decide about its validity, and +may raise an exception if the assignment is unacceptable. The rules +observed by various types and the exceptions raised are given with the +definition of the object types (see section \ref{types}). +\index{target} +\indexii{target}{list} + +Assignment of an object to a target list is recursively defined as +follows. +\indexiii{target}{list}{assignment} + +\begin{itemize} +\item +If the target list is a single target: the object is assigned to that +target. + +\item +If the target list is a comma-separated list of targets: the object +must be a tuple with the same number of items as the list contains +targets, and the items are assigned, from left to right, to the +corresponding targets. + +\end{itemize} + +Assignment of an object to a single target is recursively defined as +follows. + +\begin{itemize} % nested + +\item +If the target is an identifier (name): + +\begin{itemize} + +\item +If the name does not occur in a \keyword{global} statement in the current +code block: the name is bound to the object in the current local name +space. +\stindex{global} + +\item +Otherwise: the name is bound to the object in the current global name +space. + +\end{itemize} % nested + +The name is rebound if it was already bound. + +\item +If the target is a target list enclosed in parentheses: the object is +assigned to that target list as described above. + +\item +If the target is a target list enclosed in square brackets: the object +must be a list with the same number of items as the target list +contains targets, and its items are assigned, from left to right, to +the corresponding targets. + +\item +If the target is an attribute reference: The primary expression in the +reference is evaluated. It should yield an object with assignable +attributes; if this is not the case, \exception{TypeError} is raised. That +object is then asked to assign the assigned object to the given +attribute; if it cannot perform the assignment, it raises an exception +(usually but not necessarily \exception{AttributeError}). +\indexii{attribute}{assignment} + +\item +If the target is a subscription: The primary expression in the +reference is evaluated. It should yield either a mutable sequence +(list) object or a mapping (dictionary) object. Next, the subscript +expression is evaluated. +\indexii{subscription}{assignment} +\obindex{mutable} + +If the primary is a mutable sequence object (a list), the subscript +must yield a plain integer. If it is negative, the sequence's length +is added to it. The resulting value must be a nonnegative integer +less than the sequence's length, and the sequence is asked to assign +the assigned object to its item with that index. If the index is out +of range, \exception{IndexError} is raised (assignment to a subscripted +sequence cannot add new items to a list). +\obindex{sequence} +\obindex{list} + +If the primary is a mapping (dictionary) object, the subscript must +have a type compatible with the mapping's key type, and the mapping is +then asked to create a key/datum pair which maps the subscript to +the assigned object. This can either replace an existing key/value +pair with the same key value, or insert a new key/value pair (if no +key with the same value existed). +\obindex{mapping} +\obindex{dictionary} + +\item +If the target is a slicing: The primary expression in the reference is +evaluated. It should yield a mutable sequence object (e.g. a list). The +assigned object should be a sequence object of the same type. Next, +the lower and upper bound expressions are evaluated, insofar they are +present; defaults are zero and the sequence's length. The bounds +should evaluate to (small) integers. If either bound is negative, the +sequence's length is added to it. The resulting bounds are clipped to +lie between zero and the sequence's length, inclusive. Finally, the +sequence object is asked to replace the slice with the items of the +assigned sequence. The length of the slice may be different from the +length of the assigned sequence, thus changing the length of the +target sequence, if the object allows it. +\indexii{slicing}{assignment} + +\end{itemize} + +(In the current implementation, the syntax for targets is taken +to be the same as for expressions, and invalid syntax is rejected +during the code generation phase, causing less detailed error +messages.) + +WARNING: Although the definition of assignment implies that overlaps +between the left-hand side and the right-hand side are `safe' (e.g. +\verb@a, b = b, a@ swaps two variables), overlaps within the +collection of assigned-to variables are not safe! For instance, the +following program prints \code@[0, 2]@: + +\begin{verbatim} +x = [0, 1] +i = 0 +i, x[i] = 1, 2 +print x +\end{verbatim} + + +\section{The {\tt pass} statement} +\stindex{pass} + +\begin{verbatim} +pass_stmt: "pass" +\end{verbatim} + +\keyword{pass} is a null operation --- when it is executed, nothing +happens. It is useful as a placeholder when a statement is +required syntactically, but no code needs to be executed, for example: +\indexii{null}{operation} + +\begin{verbatim} +def f(arg): pass # a function that does nothing (yet) + +class C: pass # a class with no methods (yet) +\end{verbatim} + +\section{The {\tt del} statement} +\stindex{del} + +\begin{verbatim} +del_stmt: "del" target_list +\end{verbatim} + +Deletion is recursively defined very similar to the way assignment is +defined. Rather that spelling it out in full details, here are some +hints. +\indexii{deletion}{target} +\indexiii{deletion}{target}{list} + +Deletion of a target list recursively deletes each target, from left +to right. + +Deletion of a name removes the binding of that name (which must exist) +from the local or global name space, depending on whether the name +occurs in a \keyword{global} statement in the same code block. +\stindex{global} +\indexii{unbinding}{name} + +Deletion of attribute references, subscriptions and slicings +is passed to the primary object involved; deletion of a slicing +is in general equivalent to assignment of an empty slice of the +right type (but even this is determined by the sliced object). +\indexii{attribute}{deletion} + +\section{The {\tt print} statement} \label{print} +\stindex{print} + +\begin{verbatim} +print_stmt: "print" [ condition ("," condition)* [","] ] +\end{verbatim} + +\keyword{print} evaluates each condition in turn and writes the resulting +object to standard output (see below). If an object is not a string, +it is first converted to a string using the rules for string +conversions. The (resulting or original) string is then written. A +space is written before each object is (converted and) written, unless +the output system believes it is positioned at the beginning of a +line. This is the case: (1) when no characters have yet been written +to standard output; or (2) when the last character written to standard +output is \verb/\n/; or (3) when the last write operation on standard +output was not a \keyword{print} statement. (In some cases it may be +functional to write an empty string to standard output for this +reason.) +\index{output} +\indexii{writing}{values} + +A \verb/"\n"/ character is written at the end, unless the \keyword{print} +statement ends with a comma. This is the only action if the statement +contains just the keyword \keyword{print}. +\indexii{trailing}{comma} +\indexii{newline}{suppression} + +Standard output is defined as the file object named \verb@stdout@ +in the built-in module \verb@sys@. If no such object exists, +or if it is not a writable file, a \exception{RuntimeError} exception is raised. +(The original implementation attempts to write to the system's original +standard output instead, but this is not safe, and should be fixed.) +\indexii{standard}{output} +\refbimodindex{sys} +\ttindex{stdout} +\exindex{RuntimeError} + +\section{The {\tt return} statement} +\stindex{return} + +\begin{verbatim} +return_stmt: "return" [condition_list] +\end{verbatim} + +\keyword{return} may only occur syntactically nested in a function +definition, not within a nested class definition. +\indexii{function}{definition} +\indexii{class}{definition} + +If a condition list is present, it is evaluated, else \code{None} +is substituted. + +\keyword{return} leaves the current function call with the condition +list (or \code{None}) as return value. + +When \keyword{return} passes control out of a \keyword{try} statement +with a finally clause, that finally clause is executed +before really leaving the function. +\kwindex{finally} + +\section{The {\tt raise} statement} +\stindex{raise} + +\begin{verbatim} +raise_stmt: "raise" condition ["," condition ["," condition]] +\end{verbatim} + +\keyword{raise} evaluates its first condition, which must yield +a string, class, or instance object. If there is a second condition, +this is evaluated, else \code{None} is substituted. If the first +condition is a class object, then the second condition must be an +instance of that class or one of its derivatives. If the first +condition is an instance object, the second condition must be +\code{None}. +\index{exception} +\indexii{raising}{exception} + +If the first object is a class or string, it then raises the exception +identified by the first object, with the second one (or \code{None}) +as its parameter. If the first object is an instance, it raises the +exception identified by the class of the object, with the instance as +its parameter (and there should be no second object, or the second +object should be \code{None}). + +If a third object is present, and it it not \code{None}, it should be +a traceback object (see section \ref{traceback}), and it is +substituted instead of the current location as the place where the +exception occurred. This is useful to re-raise an exception +transparently in an except clause. +\obindex{traceback} + +\section{The {\tt break} statement} +\stindex{break} + +\begin{verbatim} +break_stmt: "break" +\end{verbatim} + +\keyword{break} may only occur syntactically nested in a \keyword{for} +or \keyword{while} loop, but not nested in a function or class definition +within that loop. +\stindex{for} +\stindex{while} +\indexii{loop}{statement} + +It terminates the nearest enclosing loop, skipping the optional +else clause if the loop has one. +\kwindex{else} + +If a \keyword{for} loop is terminated by \keyword{break}, the loop control +target keeps its current value. +\indexii{loop control}{target} + +When \keyword{break} passes control out of a \keyword{try} statement +with a finally clause, that finally clause is executed +before really leaving the loop. +\kwindex{finally} + +\section{The {\tt continue} statement} +\stindex{continue} + +\begin{verbatim} +continue_stmt: "continue" +\end{verbatim} + +\keyword{continue} may only occur syntactically nested in a \keyword{for} or +\keyword{while} loop, but not nested in a function or class definition or +\keyword{try} statement within that loop.\footnote{Except that it may +currently occur within an {\tt except} clause.} +\stindex{for} +\stindex{while} +\indexii{loop}{statement} +\kwindex{finally} + +It continues with the next cycle of the nearest enclosing loop. + +\section{The {\tt import} statement} \label{import} +\stindex{import} + +\begin{verbatim} +import_stmt: "import" identifier ("," identifier)* + | "from" identifier "import" identifier ("," identifier)* + | "from" identifier "import" "*" +\end{verbatim} + +Import statements are executed in two steps: (1) find a module, and +initialize it if necessary; (2) define a name or names in the local +name space (of the scope where the \keyword{import} statement occurs). +The first form (without \keyword{from}) repeats these steps for each +identifier in the list, the \keyword{from} form performs them once, with +the first identifier specifying the module name. +\indexii{importing}{module} +\indexii{name}{binding} +\kwindex{from} + +The system maintains a table of modules that have been initialized, +indexed by module name. (The current implementation makes this table +accessible as \code{sys.modules}.) When a module name is found in +this table, step (1) is finished. If not, a search for a module +definition is started. This first looks for a built-in module +definition, and if no built-in module if the given name is found, it +searches a user-specified list of directories for a file whose name is +the module name with extension \file{.py}. (The current +implementation uses the list of strings \code{sys.path} as the search +path; it is initialized from the shell environment variable +\envvar{PYTHONPATH}, with an installation-dependent default.) +\ttindex{modules} +\ttindex{sys.modules} +\indexii{module}{name} +\indexii{built-in}{module} +\indexii{user-defined}{module} +\refbimodindex{sys} +\ttindex{path} +\ttindex{sys.path} +\indexii{filename}{extension} + +If a built-in module is found, its built-in initialization code is +executed and step (1) is finished. If no matching file is found, +\exception{ImportError} is raised. If a file is found, it is parsed, +yielding an executable code block. If a syntax error occurs, +\exception{SyntaxError} is raised. Otherwise, an empty module of the given +name is created and inserted in the module table, and then the code +block is executed in the context of this module. Exceptions during +this execution terminate step (1). +\indexii{module}{initialization} +\exindex{SyntaxError} +\exindex{ImportError} +\index{code block} + +When step (1) finishes without raising an exception, step (2) can +begin. + +The first form of \keyword{import} statement binds the module name in the +local name space to the module object, and then goes on to import the +next identifier, if any. The \keyword{from} from does not bind the +module name: it goes through the list of identifiers, looks each one +of them up in the module found in step (1), and binds the name in the +local name space to the object thus found. If a name is not found, +\exception{ImportError} is raised. If the list of identifiers is replaced +by a star (\verb@*@), all names defined in the module are bound, +except those beginning with an underscore(\verb@_@). +\indexii{name}{binding} +\exindex{ImportError} + +Names bound by import statements may not occur in \keyword{global} +statements in the same scope. +\stindex{global} + +The \keyword{from} form with \verb@*@ may only occur in a module scope. +\kwindex{from} +\ttindex{from ... import *} + +(The current implementation does not enforce the latter two +restrictions, but programs should not abuse this freedom, as future +implementations may enforce them or silently change the meaning of the +program.) + +\section{The {\tt global} statement} \label{global} +\stindex{global} + +\begin{verbatim} +global_stmt: "global" identifier ("," identifier)* +\end{verbatim} + +The \keyword{global} statement is a declaration which holds for the +entire current code block. It means that the listed identifiers are to be +interpreted as globals. While {\em using} global names is automatic +if they are not defined in the local scope, {\em assigning} to global +names would be impossible without \keyword{global}. +\indexiii{global}{name}{binding} + +Names listed in a \keyword{global} statement must not be used in the same +code block before that \keyword{global} statement is executed. + +Names listed in a \keyword{global} statement must not be defined as formal +parameters or in a \keyword{for} loop control target, \keyword{class} +definition, function definition, or \keyword{import} statement. + +(The current implementation does not enforce the latter two +restrictions, but programs should not abuse this freedom, as future +implementations may enforce them or silently change the meaning of the +program.) + +Note: the \keyword{global} is a directive to the parser. Therefore, it +applies only to code parsed at the same time as the \keyword{global} +statement. In particular, a \keyword{global} statement contained in an +\keyword{exec} statement does not affect the code block {\em containing} +the \keyword{exec} statement, and code contained in an \keyword{exec} +statement is unaffected by \keyword{global} statements in the code +containing the \keyword{exec} statement. The same applies to the +\function{eval()}, \function{execfile()} and \function{compile()} functions. +\stindex{exec} +\bifuncindex{eval} +\bifuncindex{execfile} +\bifuncindex{compile} diff --git a/Doc/ref7.tex b/Doc/ref7.tex new file mode 100644 index 0000000..f5b8a0e --- /dev/null +++ b/Doc/ref7.tex @@ -0,0 +1,391 @@ +\chapter{Compound statements} +\indexii{compound}{statement} + +Compound statements contain (groups of) other statements; they affect +or control the execution of those other statements in some way. In +general, compound statements span multiple lines, although in simple +incarnations a whole compound statement may be contained in one line. + +The \verb@if@, \verb@while@ and \verb@for@ statements implement +traditional control flow constructs. \verb@try@ specifies exception +handlers and/or cleanup code for a group of statements. Function and +class definitions are also syntactically compound statements. + +Compound statements consist of one or more `clauses'. A clause +consists of a header and a `suite'. The clause headers of a +particular compound statement are all at the same indentation level. +Each clause header begins with a uniquely identifying keyword and ends +with a colon. A suite is a group of statements controlled by a +clause. A suite can be one or more semicolon-separated simple +statements on the same line as the header, following the header's +colon, or it can be one or more indented statements on subsequent +lines. Only the latter form of suite can contain nested compound +statements; the following is illegal, mostly because it wouldn't be +clear to which \verb@if@ clause a following \verb@else@ clause would +belong: +\index{clause} +\index{suite} + +\begin{verbatim} +if test1: if test2: print x +\end{verbatim} + +Also note that the semicolon binds tighter than the colon in this +context, so that in the following example, either all or none of the +\verb@print@ statements are executed: + +\begin{verbatim} +if x < y < z: print x; print y; print z +\end{verbatim} + +Summarizing: + +\begin{verbatim} +compound_stmt: if_stmt | while_stmt | for_stmt + | try_stmt | funcdef | classdef +suite: stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT +statement: stmt_list NEWLINE | compound_stmt +stmt_list: simple_stmt (";" simple_stmt)* [";"] +\end{verbatim} + +Note that statements always end in a \verb@NEWLINE@ possibly followed +by a \verb@DEDENT@. +\index{NEWLINE token} +\index{DEDENT token} + +Also note that optional continuation clauses always begin with a +keyword that cannot start a statement, thus there are no ambiguities +(the `dangling \verb@else@' problem is solved in Python by requiring +nested \verb@if@ statements to be indented). +\indexii{dangling}{else} + +The formatting of the grammar rules in the following sections places +each clause on a separate line for clarity. + +\section{The {\tt if} statement} +\stindex{if} + +The \verb@if@ statement is used for conditional execution: + +\begin{verbatim} +if_stmt: "if" condition ":" suite + ("elif" condition ":" suite)* + ["else" ":" suite] +\end{verbatim} + +It selects exactly one of the suites by evaluating the conditions one +by one until one is found to be true (see section \ref{Booleans} for +the definition of true and false); then that suite is executed (and no +other part of the \verb@if@ statement is executed or evaluated). If +all conditions are false, the suite of the \verb@else@ clause, if +present, is executed. +\kwindex{elif} +\kwindex{else} + +\section{The {\tt while} statement} +\stindex{while} +\indexii{loop}{statement} + +The \verb@while@ statement is used for repeated execution as long as a +condition is true: + +\begin{verbatim} +while_stmt: "while" condition ":" suite + ["else" ":" suite] +\end{verbatim} + +This repeatedly tests the condition and, if it is true, executes the +first suite; if the condition is false (which may be the first time it +is tested) the suite of the \verb@else@ clause, if present, is +executed and the loop terminates. +\kwindex{else} + +A \verb@break@ statement executed in the first suite terminates the +loop without executing the \verb@else@ clause's suite. A +\verb@continue@ statement executed in the first suite skips the rest +of the suite and goes back to testing the condition. +\stindex{break} +\stindex{continue} + +\section{The {\tt for} statement} +\stindex{for} +\indexii{loop}{statement} + +The \verb@for@ statement is used to iterate over the elements of a +sequence (string, tuple or list): +\obindex{sequence} + +\begin{verbatim} +for_stmt: "for" target_list "in" condition_list ":" suite + ["else" ":" suite] +\end{verbatim} + +The condition list is evaluated once; it should yield a sequence. The +suite is then executed once for each item in the sequence, in the +order of ascending indices. Each item in turn is assigned to the +target list using the standard rules for assignments, and then the +suite is executed. When the items are exhausted (which is immediately +when the sequence is empty), the suite in the \verb@else@ clause, if +present, is executed, and the loop terminates. +\kwindex{in} +\kwindex{else} +\indexii{target}{list} + +A \verb@break@ statement executed in the first suite terminates the +loop without executing the \verb@else@ clause's suite. A +\verb@continue@ statement executed in the first suite skips the rest +of the suite and continues with the next item, or with the \verb@else@ +clause if there was no next item. +\stindex{break} +\stindex{continue} + +The suite may assign to the variable(s) in the target list; this does +not affect the next item assigned to it. + +The target list is not deleted when the loop is finished, but if the +sequence is empty, it will not have been assigned to at all by the +loop. + +Hint: the built-in function \verb@range()@ returns a sequence of +integers suitable to emulate the effect of Pascal's +\verb@for i := a to b do@; +e.g. \verb@range(3)@ returns the list \verb@[0, 1, 2]@. +\bifuncindex{range} +\index{Pascal} + +{\bf Warning:} There is a subtlety when the sequence is being modified +by the loop (this can only occur for mutable sequences, i.e. lists). +An internal counter is used to keep track of which item is used next, +and this is incremented on each iteration. When this counter has +reached the length of the sequence the loop terminates. This means that +if the suite deletes the current (or a previous) item from the +sequence, the next item will be skipped (since it gets the index of +the current item which has already been treated). Likewise, if the +suite inserts an item in the sequence before the current item, the +current item will be treated again the next time through the loop. +This can lead to nasty bugs that can be avoided by making a temporary +copy using a slice of the whole sequence, e.g. +\index{loop!over mutable sequence} +\index{mutable sequence!loop over} + +\begin{verbatim} +for x in a[:]: + if x < 0: a.remove(x) +\end{verbatim} + +\section{The {\tt try} statement} \label{try} +\stindex{try} + +The \verb@try@ statement specifies exception handlers and/or cleanup +code for a group of statements: + +\begin{verbatim} +try_stmt: try_exc_stmt | try_fin_stmt +try_exc_stmt: "try" ":" suite + ("except" [condition ["," target]] ":" suite)+ + ["else" ":" suite] +try_fin_stmt: "try" ":" suite + "finally" ":" suite +\end{verbatim} + +There are two forms of \verb@try@ statement: \verb@try...except@ and +\verb@try...finally@. These forms cannot be mixed. + +The \verb@try...except@ form specifies one or more exception handlers +(the \verb@except@ clauses). When no exception occurs in the +\verb@try@ clause, no exception handler is executed. When an +exception occurs in the \verb@try@ suite, a search for an exception +handler is started. This inspects the except clauses in turn until +one is found that matches the exception. A condition-less except +clause, if present, must be last; it matches any exception. For an +except clause with a condition, that condition is evaluated, and the +clause matches the exception if the resulting object is ``compatible'' +with the exception. An object is compatible with an exception if it +is either the object that identifies the exception, or (for exceptions +that are classes) it is a base class of the exception, or it is a +tuple containing an item that is compatible with the exception. Note +that the object identities must match, i.e. it must be the same +object, not just an object with the same value. +\kwindex{except} + +If no except clause matches the exception, the search for an exception +handler continues in the surrounding code and on the invocation stack. + +If the evaluation of a condition in the header of an except clause +raises an exception, the original search for a handler is cancelled +and a search starts for the new exception in the surrounding code and +on the call stack (it is treated as if the entire \verb@try@ statement +raised the exception). + +When a matching except clause is found, the exception's parameter is +assigned to the target specified in that except clause, if present, +and the except clause's suite is executed. When the end of this suite +is reached, execution continues normally after the entire try +statement. (This means that if two nested handlers exist for the same +exception, and the exception occurs in the try clause of the inner +handler, the outer handler will not handle the exception.) + +Before an except clause's suite is executed, details about the +exception are assigned to three variables in the \verb@sys@ module: +\verb@sys.exc_type@ receives the object identifying the exception; +\verb@sys.exc_value@ receives the exception's parameter; +\verb@sys.exc_traceback@ receives a traceback object (see section +\ref{traceback}) identifying the point in the program where the +exception occurred. +\refbimodindex{sys} +\ttindex{exc_type} +\ttindex{exc_value} +\ttindex{exc_traceback} +\obindex{traceback} + +The optional \verb@else@ clause is executed when no exception occurs +in the \verb@try@ clause. Exceptions in the \verb@else@ clause are +not handled by the preceding \verb@except@ clauses. +\kwindex{else} + +The \verb@try...finally@ form specifies a `cleanup' handler. The +\verb@try@ clause is executed. When no exception occurs, the +\verb@finally@ clause is executed. When an exception occurs in the +\verb@try@ clause, the exception is temporarily saved, the +\verb@finally@ clause is executed, and then the saved exception is +re-raised. If the \verb@finally@ clause raises another exception or +executes a \verb@return@, \verb@break@ or \verb@continue@ statement, +the saved exception is lost. +\kwindex{finally} + +When a \verb@return@ or \verb@break@ statement is executed in the +\verb@try@ suite of a \verb@try...finally@ statement, the +\verb@finally@ clause is also executed `on the way out'. A +\verb@continue@ statement is illegal in the \verb@try@ clause. (The +reason is a problem with the current implementation --- this +restriction may be lifted in the future). +\stindex{return} +\stindex{break} +\stindex{continue} + +\section{Function definitions} \label{function} +\indexii{function}{definition} + +A function definition defines a user-defined function object (see +section \ref{types}):\footnote{The new syntax to receive arbitrary +keyword arguments is not yet documented in this manual. See chapter +12 of the Tutorial.} +\obindex{user-defined function} +\obindex{function} + +\begin{verbatim} +funcdef: "def" funcname "(" [parameter_list] ")" ":" suite +parameter_list: (defparameter ",")* ("*" identifier [, "**" identifier] + | "**" identifier + | defparameter [","]) +defparameter: parameter ["=" condition] +sublist: parameter ("," parameter)* [","] +parameter: identifier | "(" sublist ")" +funcname: identifier +\end{verbatim} + +A function definition is an executable statement. Its execution binds +the function name in the current local name space to a function object +(a wrapper around the executable code for the function). This +function object contains a reference to the current global name space +as the global name space to be used when the function is called. +\indexii{function}{name} +\indexii{name}{binding} + +The function definition does not execute the function body; this gets +executed only when the function is called. + +When one or more top-level parameters have the form {\em parameter = +condition}, the function is said to have ``default parameter values''. +Default parameter values are evaluated when the function definition is +executed. For a parameter with a default value, the correponding +argument may be omitted from a call, in which case the parameter's +default value is substituted. If a parameter has a default value, all +following parameters must also have a default value --- this is a +syntactic restriction that is not expressed by the grammar.% +\footnote{Currently this is not checked; instead, +{\tt def f(a=1,b)} is interpreted as {\tt def f(a=1,b=None)}.} +\indexiii{default}{parameter}{value} + +Function call semantics are described in section \ref{calls}. When a +user-defined function is called, first missing arguments for which a +default value exists are supplied; then the arguments (a.k.a. actual +parameters) are bound to the (formal) parameters, as follows: +\indexii{function}{call} +\indexiii{user-defined}{function}{call} +\index{parameter} +\index{argument} +\indexii{parameter}{formal} +\indexii{parameter}{actual} + +\begin{itemize} + +\item +If there are no formal parameters, there must be no arguments. + +\item +If the formal parameter list does not end in a star followed by an +identifier, there must be exactly as many arguments as there are +parameters in the formal parameter list (at the top level); the +arguments are assigned to the formal parameters one by one. Note that +the presence or absence of a trailing comma at the top level in either +the formal or the actual parameter list makes no difference. The +assignment to a formal parameter is performed as if the parameter +occurs on the left hand side of an assignment statement whose right +hand side's value is that of the argument. + +\item +If the formal parameter list ends in a star followed by an identifier, +preceded by zero or more comma-followed parameters, there must be at +least as many arguments as there are parameters preceding the star. +Call this number {\em N}. The first {\em N} arguments are assigned to +the corresponding formal parameters in the way descibed above. A +tuple containing the remaining arguments, if any, is then assigned to +the identifier following the star. This variable will always be a +tuple: if there are no extra arguments, its value is \verb@()@, if +there is just one extra argument, it is a singleton tuple. +\indexii{variable length}{parameter list} + +\end{itemize} + +Note that the `variable length parameter list' feature only works at +the top level of the parameter list; individual parameters use a model +corresponding more closely to that of ordinary assignment. While the +latter model is generally preferable, because of the greater type +safety it offers (wrong-sized tuples aren't silently mistreated), +variable length parameter lists are a sufficiently accepted practice +in most programming languages that a compromise has been worked out. +(And anyway, assignment has no equivalent for empty argument lists.) + +It is also possible to create anonymous functions (functions not bound +to a name), for immediate use in expressions. This uses lambda forms, +described in section \ref{lambda}. +\indexii{lambda}{form} + +\section{Class definitions} \label{class} +\indexii{class}{definition} + +A class definition defines a class object (see section \ref{types}): +\obindex{class} + +\begin{verbatim} +classdef: "class" classname [inheritance] ":" suite +inheritance: "(" [condition_list] ")" +classname: identifier +\end{verbatim} + +A class definition is an executable statement. It first evaluates the +inheritance list, if present. Each item in the inheritance list +should evaluate to a class object. The class's suite is then executed +in a new execution frame (see section \ref{execframes}), using a newly +created local name space and the original global name space. +(Usually, the suite contains only function definitions.) When the +class's suite finishes execution, its execution frame is discarded but +its local name space is saved. A class object is then created using +the inheritance list for the base classes and the saved local name +space for the attribute dictionary. The class name is bound to this +class object in the original local name space. +\index{inheritance} +\indexii{class}{name} +\indexii{name}{binding} +\indexii{execution}{frame} diff --git a/Doc/ref8.tex b/Doc/ref8.tex new file mode 100644 index 0000000..a678f9f --- /dev/null +++ b/Doc/ref8.tex @@ -0,0 +1,105 @@ +\chapter{Top-level components} + +The Python interpreter can get its input from a number of sources: +from a script passed to it as standard input or as program argument, +typed in interactively, from a module source file, etc. This chapter +gives the syntax used in these cases. +\index{interpreter} + +\section{Complete Python programs} +\index{program} + +While a language specification need not prescribe how the language +interpreter is invoked, it is useful to have a notion of a complete +Python program. A complete Python program is executed in a minimally +initialized environment: all built-in and standard modules are +available, but none have been initialized, except for \verb@sys@ +(various system services), \verb@__builtin__@ (built-in functions, +exceptions and \verb@None@) and \verb@__main__@. The latter is used +to provide the local and global name space for execution of the +complete program. +\refbimodindex{sys} +\refbimodindex{__main__} +\refbimodindex{__builtin__} + +The syntax for a complete Python program is that for file input, +described in the next section. + +The interpreter may also be invoked in interactive mode; in this case, +it does not read and execute a complete program but reads and executes +one statement (possibly compound) at a time. The initial environment +is identical to that of a complete program; each statement is executed +in the name space of \verb@__main__@. +\index{interactive mode} +\refbimodindex{__main__} + +Under {\UNIX}, a complete program can be passed to the interpreter in +three forms: with the {\bf -c} {\it string} command line option, as a +file passed as the first command line argument, or as standard input. +If the file or standard input is a tty device, the interpreter enters +interactive mode; otherwise, it executes the file as a complete +program. +\index{UNIX} +\index{command line} +\index{standard input} + +\section{File input} + +All input read from non-interactive files has the same form: + +\begin{verbatim} +file_input: (NEWLINE | statement)* +\end{verbatim} + +This syntax is used in the following situations: + +\begin{itemize} + +\item when parsing a complete Python program (from a file or from a string); + +\item when parsing a module; + +\item when parsing a string passed to the \verb@exec@ statement; + +\end{itemize} + +\section{Interactive input} + +Input in interactive mode is parsed using the following grammar: + +\begin{verbatim} +interactive_input: [stmt_list] NEWLINE | compound_stmt NEWLINE +\end{verbatim} + +Note that a (top-level) compound statement must be followed by a blank +line in interactive mode; this is needed to help the parser detect the +end of the input. + +\section{Expression input} +\index{input} + +There are two forms of expression input. Both ignore leading +whitespace. + +The string argument to \verb@eval()@ must have the following form: +\bifuncindex{eval} + +\begin{verbatim} +eval_input: condition_list NEWLINE* +\end{verbatim} + +The input line read by \verb@input()@ must have the following form: +\bifuncindex{input} + +\begin{verbatim} +input_input: condition_list NEWLINE +\end{verbatim} + +Note: to read `raw' input line without interpretation, you can use the +built-in function \verb@raw_input()@ or the \verb@readline()@ method +of file objects. +\obindex{file} +\index{input!raw} +\index{raw input} +\bifuncindex{raw_index} +\ttindex{readline} -- cgit v0.12