From 8675115e5f55e69fdb30ebba95e7a6a5216e133c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 28 Feb 1995 17:14:32 +0000 Subject: a few typographical changes (e.g. -- => ---) and lots of new stuff in the WWW chapter --- Doc/lib.tex | 6 +- Doc/lib/lib.tex | 6 +- Doc/lib/libcgi.tex | 4 +- Doc/lib/libftplib.tex | 3 + Doc/lib/libfuncs.tex | 2 +- Doc/lib/libgopherlib.tex | 3 + Doc/lib/libhtmllib.tex | 270 +++++++++++++++++++++++++++++++++++++++++++++- Doc/lib/libhttplib.tex | 2 + Doc/lib/libimp.tex | 6 +- Doc/lib/libmimetools.tex | 3 + Doc/lib/libnntplib.tex | 3 + Doc/lib/librfc822.tex | 2 + Doc/lib/libsgmllib.tex | 147 ++++++++++++++++++++++++- Doc/lib/libsocket.tex | 12 +-- Doc/lib/libstring.tex | 6 +- Doc/lib/liburllib.tex | 6 +- Doc/lib/liburlparse.tex | 2 + Doc/libcgi.tex | 4 +- Doc/libftplib.tex | 3 + Doc/libfuncs.tex | 2 +- Doc/libgopherlib.tex | 3 + Doc/libhtmllib.tex | 270 +++++++++++++++++++++++++++++++++++++++++++++- Doc/libhttplib.tex | 2 + Doc/libimp.tex | 6 +- Doc/libmac.tex | 6 +- Doc/libmacconsole.tex | 2 + Doc/libmacfs.tex | 2 + Doc/libmacspeech.tex | 2 + Doc/libmimetools.tex | 3 + Doc/libnntplib.tex | 3 + Doc/librfc822.tex | 2 + Doc/libsgmllib.tex | 147 ++++++++++++++++++++++++- Doc/libsocket.tex | 12 +-- Doc/libstring.tex | 6 +- Doc/liburllib.tex | 6 +- Doc/liburlparse.tex | 2 + Doc/mac/libmac.tex | 6 +- Doc/mac/libmacconsole.tex | 2 + Doc/mac/libmacfs.tex | 2 + Doc/mac/libmacspeech.tex | 2 + Doc/ref/ref3.tex | 6 +- Doc/ref/ref4.tex | 2 +- Doc/ref3.tex | 6 +- Doc/ref4.tex | 2 +- Doc/tut.tex | 12 +-- Doc/tut/tut.tex | 12 +-- 46 files changed, 958 insertions(+), 60 deletions(-) diff --git a/Doc/lib.tex b/Doc/lib.tex index 07f70cf..873e8fb 100644 --- a/Doc/lib.tex +++ b/Doc/lib.tex @@ -1,5 +1,9 @@ \documentstyle[twoside,11pt,myformat]{report} +% NOTE: this file controls which chapters/sections of the library +% manual are actually printed. It is easy to customize your manual +% by commenting out sections that you're not interested in. + \title{Python Library Reference} \input{boilerplate} @@ -112,7 +116,7 @@ language. %\input{libamoeba} % AMOEBA ONLY -%\input{libmac} % MACINTOSH ONLY +\input{libmac} % MACINTOSH ONLY \input{libstdwin} % STDWIN ONLY diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index 07f70cf..873e8fb 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -1,5 +1,9 @@ \documentstyle[twoside,11pt,myformat]{report} +% NOTE: this file controls which chapters/sections of the library +% manual are actually printed. It is easy to customize your manual +% by commenting out sections that you're not interested in. + \title{Python Library Reference} \input{boilerplate} @@ -112,7 +116,7 @@ language. %\input{libamoeba} % AMOEBA ONLY -%\input{libmac} % MACINTOSH ONLY +\input{libmac} % MACINTOSH ONLY \input{libstdwin} % STDWIN ONLY diff --git a/Doc/lib/libcgi.tex b/Doc/lib/libcgi.tex index 9d27644..a5d1cdf 100644 --- a/Doc/lib/libcgi.tex +++ b/Doc/lib/libcgi.tex @@ -6,6 +6,8 @@ \indexii{MIME}{headers} \index{URL} +\renewcommand{\indexsubitem}{(in module cgi)} + This module makes it easy to write Python scripts that run in a WWW server using the Common Gateway Interface. It was written by Michael McLay and subsequently modified by Steve Majewski and Guido van @@ -113,7 +115,7 @@ if it is unique, or raise \code{IndexError} if the field was specified more than once in the form. (If the field wasn't specified at all, \code{KeyError} is raised.) To access fields that are specified multiple times, use \code{form.getlist(fieldname)}. The -\code{values()} and \code{items()} methods return mixed lists -- +\code{values()} and \code{items()} methods return mixed lists --- containing strings for singly-defined fields, and lists of strings for multiply-defined fields. \end{funcdesc} diff --git a/Doc/lib/libftplib.tex b/Doc/lib/libftplib.tex index 105ccdf..acd8784 100644 --- a/Doc/lib/libftplib.tex +++ b/Doc/lib/libftplib.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{ftplib}} \stmodindex{ftplib} + +\renewcommand{\indexsubitem}{(in module ftplib)} + To be provided. diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index ed2427b..91a9ec9 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -120,7 +120,7 @@ exactly one argument.) This function is similar to the \code{eval()} function or the \code{exec} statement, but parses a file instead of a string. It is different from the \code{import} statement in that it does not use - the module administration -- it reads the file unconditionally and + the module administration --- it reads the file unconditionally and does not create a new module. The arguments are a file name and two optional dictionaries. The diff --git a/Doc/lib/libgopherlib.tex b/Doc/lib/libgopherlib.tex index 9b81e37..904a64b 100644 --- a/Doc/lib/libgopherlib.tex +++ b/Doc/lib/libgopherlib.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{gopherlib}} \stmodindex{gopherlib} + +\renewcommand{\indexsubitem}{(in module gopherlib)} + To be provided. diff --git a/Doc/lib/libhtmllib.tex b/Doc/lib/libhtmllib.tex index 9ea10ee..e192774 100644 --- a/Doc/lib/libhtmllib.tex +++ b/Doc/lib/libhtmllib.tex @@ -1,3 +1,271 @@ \section{Built-in module \sectcode{htmllib}} \stmodindex{htmllib} -To be provided. +\index{HTML} +\index{hypertext} + +\renewcommand{\indexsubitem}{(in module htmllib)} + +This module defines a number of classes which can serve as a basis for +parsing text files formatted in HTML (HyperText Mark-up Language). +The classes are not directly concerned with I/O --- the have to be fed +their input in string form, and will make calls to methods of a +``formatter'' object in order to produce output. The classes are +designed to be used as base classes for other classes in order to add +functionality, and allow most of their methods to be extended or +overridden. In turn, the classes are derived from and extend the +class \code{SGMLParser} defined in module \code{sgmllib}. +\index{SGML} +\stmodindex{sgmllib} +\ttindex{SGMLParser} +\index{formatter} + +The following is a summary of the interface defined by +\code{sgmllib.SGMLParser}: + +\begin{itemize} + +\item +The interface to feed data to an instance is through the \code{feed()} +method, which takes a string argument. This can be called with as +little or as much text at a time. When the data contains complete +HTML elements, these are processed immediately; incomplete elements +are saved in a buffer. To force processing of all unprocessed data, +call the \code{close()} method. + +Example: to parse the entire contents of a file, do +\code{parser.feed(open(file).read()); parser.close()}. + +\item +The interface to define semantics for HTML tags is very simple: derive +a class and define methods called \code{start_\var{tag}()}, +\code{end_\var{tag}()}, or \code{do_\var{tag}()}. The parser will +call these at appropriate moments: \code{start_\var{tag}} or +\code{do_\var{tag}} is called when an opening tag of the form +\code{<\var{tag} ...>} is encountered; \code{end_\var{tag}} is called +when a closing tag of the form \code{<\var{tag}>} is encountered. If +an opening tag requires a corresponding closing tag, like \code{

} +... \code{

}, the class should define the \code{start_\var{tag}} +method; if a tag requires no closing tag, like \code{

}, the class +should define the \code{do_\var{tag}} method. + +\end{itemize} + +The module defines the following classes: + +\begin{funcdesc}{HTMLParser}{} +This is the most basic HTML parser class. It defines one additional +entity name over the names defined by the \code{SGMLParser} base +class, \code{\•}. It also defines handlers for the following +tags: \code{

...}, \code{...}, and +\code{} (the latter is terminated only by end of file). +\end{funcdesc} + +\begin{funcdesc}{CollectingParser}{} +This class, derived from \code{HTMLParser}, collects various useful +bits of information from the HTML text. To this end it defines +additional handlers for the following tags: \code{<A>...</A>}, +\code{<HEAD>...</HEAD>}, \code{<BODY>...</BODY>}, +\code{<TITLE>...</TITLE>}, \code{<NEXTID>}, and \code{<ISINDEX>}. +\end{funcdesc} + +\begin{funcdesc}{FormattingParser}{formatter\, stylesheet} +This class, derived from \code{CollectingParser}, interprets a wide +selection of HTML tags so it can produce formatted output from the +parsed data. It is initialized with two objects, a \var{formatter} +which should define a number of methods to format text into +paragraphs, and a \var{stylesheet} which defines a number of static +parameters for the formatting process. Formatters and style sheets +are documented later in this section. +\index{formatter} +\index{style sheet} +\end{funcdesc} + +\begin{funcdesc}{AnchoringParser}{formatter\, stylesheet} +This class, derived from \code{FormattingParser}, extends the handling +of the \code{<A>...</A>} tag pair to call the formatter's +\code{bgn_anchor()} and \code{end_anchor()} methods. This allows the +formatter to display the anchor in a different font or color, etc. +\end{funcdesc} + +Instances of \code{CollectingParser} (and thus also instances of +\code{FormattingParser} and \code{AnchoringParser}) have the following +instance variables: + +\begin{datadesc}{anchornames} +A list of the values if the \code{NAME} attributes of the \code{<A>} +tags encountered. +\end{datadesc} + +\begin{datadesc}{anchors} +A list of the values of \code{HREF} attributes of the \code{<A>} tags +encountered. +\end{datadesc} + +\begin{datadesc}{anchortypes} +A list of the values if the \code{TYPE} attributes of the \code{<A>} +tags encountered. +\end{datadesc} + +\begin{datadesc}{inanchor} +Outside an \code{<A>...</A>} tag pair, this is zero. inside such a +pair, it is a unique integer, which is positive if the anchor has a +\code{HREF} attribute, negative if it hasn't. Its absolute value is +one more than the index of the anchor in the \code{anchors}, +\code{anchornames} and \code{anchortypes} lists. +\end{datadesc} + +\begin{datadesc}{isindex} +True if the \code{<ISINDEX>} tag has been encountered. +\end{datadesc} + +\begin{datadesc}{nextid} +The attribute list of the last \code{<NEXTID>} tag encountered, or +an empty list if none. +\end{datadesc} + +\begin{datadesc}{title} +The text inside the last \code{<TITLE>...</TITLE>} tag pair, or +\code{''} if no title has been encountered yet. +\end{datadesc} + +The \code{anchors}, \code{anchornames} and \code{anchortypes} lists +are ``parallel arrays'': items in these lists with the same index +pertain to the same anchor. Missing attributes default to the empty +string. Anchors with neither a \code{HREF} not a \code{NAME} +attribute are not entered in these lists at all. + +The module also defines a number of style sheet classes. These should +never be instantiated --- their class variables are the only behaviour +required. Note that style sheets are specifically designed for a +particular formatter implementation. The currently defined style +sheets are: +\index{style sheet} + +\begin{datadesc}{NullStylesheet} +A style sheet for use on a dumb output device such as an ASCII +terminal. +\end{datadesc} + +\begin{datadesc}{X11Stylesheet} +A style sheet for use with an X11 server. +\end{datadesc} + +\begin{datadesc}{MacStylesheet} +A style sheet for use on Apple Macintosh computers. +\end{datadesc} + +\begin{datadesc}{StdwinStylesheet} +A style sheet for use with the \code{stdwin} module; it is an alias +for either \code{X11Stylesheet} or \code{MacStylesheet}. +\bimodindex{stdwin} +\end{datadesc} + +\begin{datadesc}{GLStylesheet} +A style sheet for use with the SGI Graphics Library and its font +manager (the SGI-specific built-in modules \code{gl} and \code{fm}). +\bimodindex{gl} +\bimodindex{fm} +\end{datadesc} + +Style sheets have the following class variables: + +\begin{datadesc}{stdfontset} +A list of up to four font definititions, respectively for the roman, +italic, bold and constant-width variant of a font for normal text. If +the list contains less than four font definitions, the last item is +used as the default for missing items. The type of a font definition +depends on the formatter in use; its only use is as a parameter to the +formatter's \code{setfont()} method. +\end{datadesc} + +\begin{datadesc}{h1fontset} +\dataline{h2fontset} +\dataline{h3fontset} +The font set used for various headers (text inside \code{<H1>...</H1>} +tag pairs etc.). +\end{datadesc} + +\begin{datadesc}{stdindent} +The indentation of normal text. This is measured in the ``native'' +units of the formatter in use; for some formatters these are +characters, for others (especially those that actually support +variable-spacing fonts) in pixels or printer points. +\end{datadesc} + +\begin{datadesc}{ddindent} +The indentation used for the first level of \code{<DD>} tags. +\end{datadesc} + +\begin{datadesc}{ulindent} +The indentation used for the first level of \code{<UL>} tags. +\end{datadesc} + +\begin{datadesc}{h1indent} +The indentation used for level 1 headers. +\end{datadesc} + +\begin{datadesc}{h2indent} +The indentation used for level 2 headers. +\end{datadesc} + +\begin{datadesc}{literalindent} +The indentation used for literal text (text inside +\code{<PRE>...</PRE>} and similar tag pairs). +\end{datadesc} + +Although no documented implementation of a formatter exists, the +\code{FormattingParser} class assumes that formatters have a +certain interface. This interface requires the following methods: +\index{formatter} + +\begin{funcdesc}{setfont}{fontspec} +Set the font to be used subsequently. The \var{fontspec} argument is +an item in a style sheet's font set. +\end{funcdesc} + +\begin{funcdesc}{flush}{} +Finish the current line, if not empty, and begin a new one. +\end{funcdesc} + +\begin{funcdesc}{setleftindent}{n} +Set the left indentation of the following lines to \var{n} units. +\end{funcdesc} + +\begin{funcdesc}{needvspace}{n} +Require at least \var{n} blank lines before the next line. Implies +\code{flush()}. +\end{funcdesc} + +\begin{funcdesc}{addword}{word\, space} +Add a var{word} to the current paragraph, followed by \var{space} +spaces. +\end{funcdesc} + +\begin{datadesc}{nospace} +If this instance variable is true, empty words are ignored by +\code{addword}. It is set to false after a non-empty word has been +added. +\end{datadesc} + +\begin{funcdesc}{setjust}{justification} +Set the justification of the current paragraph. The +\var{justification} can be \code{'c'} (center), \code{'l'} (left +justified), \code{'r'} (right justified) or \code{'lr'} (left and +right justified). +\end{funcdesc} + +\begin{funcdesc}{bgn_anchor}{id} +Begin an anchor. The \var{id} parameter is the value of the parser's +\code{inanchor} attribute. +\end{funcdesc} + +\begin{funcdesc}{end_anchor}{id} +End an anchor. The \var{id} parameter is the value of the parser's +\code{inanchor} attribute. +\end{funcdesc} + +A sample formatters implementation can be found in the module +\code{fmt}, which in turn uses the module \code{Para}. These are +currently not intended as a +\ttindex{fmt} +\ttindex{Para} diff --git a/Doc/lib/libhttplib.tex b/Doc/lib/libhttplib.tex index a284faa..e36bba4 100644 --- a/Doc/lib/libhttplib.tex +++ b/Doc/lib/libhttplib.tex @@ -2,6 +2,8 @@ \stmodindex{httplib} \index{HTTP} +\renewcommand{\indexsubitem}{(in module httplib)} + This module defines a class which implements the client side of the HTTP protocol. It is normally not used directly --- the module \code{urlllib} module uses it to handle URLs that use HTTP. diff --git a/Doc/lib/libimp.tex b/Doc/lib/libimp.tex index 1a313fa..befde61 100644 --- a/Doc/lib/libimp.tex +++ b/Doc/lib/libimp.tex @@ -38,7 +38,7 @@ returned by \code{get_suffixes} describing the kind of file found. \begin{funcdesc}{init_builtin}{name} Initialize the built-in module called \var{name} and return its module object. If the module was already initialized, it will be initialized -{\em again}. A few modules cannot be initialized twice -- attempting +{\em again}. A few modules cannot be initialized twice --- attempting to initialize these again will raise an exception. If there is no built-in module called \var{name}, \code{None} is returned. \end{funcdesc} @@ -73,7 +73,7 @@ it will be initialized {\em again}. The \var{name} argument is used to create or access a module object. The \var{pathname} argument points to the byte-compiled code file. The optional \var{file} argument is the byte-compiled code file, open for reading in binary -mode, from the beginning -- if not given, the function opens +mode, from the beginning --- if not given, the function opens \var{pathname}. It must currently be a real file object, not a user-defined class emulating a file. \end{funcdesc} @@ -97,7 +97,7 @@ return its module object. If the module was already initialized, it will be initialized {\em again}. The \var{name} argument is used to create or access a module object. The \var{pathname} argument points to the source file. The optional \var{file} argument is the source -file, open for reading as text, from the beginning -- if not given, +file, open for reading as text, from the beginning --- if not given, the function opens \var{pathname}. It must currently be a real file object, not a user-defined class emulating a file. Note that if a properly matching byte-compiled file (with suffix \code{.pyc}) exists, diff --git a/Doc/lib/libmimetools.tex b/Doc/lib/libmimetools.tex index c32224b..0d1a81b 100644 --- a/Doc/lib/libmimetools.tex +++ b/Doc/lib/libmimetools.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{mimetools}} \stmodindex{mimetools} + +\renewcommand{\indexsubitem}{(in module mimetools)} + To be provided. diff --git a/Doc/lib/libnntplib.tex b/Doc/lib/libnntplib.tex index 93e7ed1..6aac671 100644 --- a/Doc/lib/libnntplib.tex +++ b/Doc/lib/libnntplib.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{nntplib}} \stmodindex{nntplib} + +\renewcommand{\indexsubitem}{(in module nntplib)} + To be provided. diff --git a/Doc/lib/librfc822.tex b/Doc/lib/librfc822.tex index 43a5cea..641ea85 100644 --- a/Doc/lib/librfc822.tex +++ b/Doc/lib/librfc822.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{rfc822}} \stmodindex{rfc822} +\renewcommand{\indexsubitem}{(in module rfc822)} + This module defines a class, \code{Message}, which represents a collection of ``email headers'' as defined by the Internet standard RFC 822. It is used in various contexts, usually to read such headers diff --git a/Doc/lib/libsgmllib.tex b/Doc/lib/libsgmllib.tex index 03d9ba2..29e26c2 100644 --- a/Doc/lib/libsgmllib.tex +++ b/Doc/lib/libsgmllib.tex @@ -1,3 +1,148 @@ \section{Built-in module \sectcode{sgmllib}} \stmodindex{sgmllib} -To be provided. +\index{SGML} + +\renewcommand{\indexsubitem}{(in module sgmllib)} + +This module defines a class \code{SGMLParser} which serves as the +basis for parsing text files formatted in SGML (Standard Generalized +Mark-up Language). In fact, it does not provide a full SGML parser +--- it only parses SGML insofar as it is used by HTML, and module only +exists as a basis for the \code{htmllib} module. +\stmodindex{htmllib} + +In particular, the parser is hardcoded to recognize the following +elements: + +\begin{itemize} + +\item +Opening and closing tags of the form +``\code{<\var{tag} \var{attr}="\var{value}" ...>}'' and +``\code{</\var{tag}>}'', respectively. + +\item +Character references of the form ``\code{\&\#\var{name};}''. + +\item +Entity references of the form ``\code{\&\var{name};}''. + +\item +SGML comments of the form ``\code{<!--\var{text}>}''. + +\end{itemize} + +The \code{SGMLParser} class must be instantiated without arguments. +It has the following interface methods: + +\begin{funcdesc}{reset}{} +Reset the instance. Loses all unprocessed data. This is called +implicitly at instantiation time. +\end{funcdesc} + +\begin{funcdesc}{setnomoretags}{} +Stop processing tags. Treat all following input as literal input +(CDATA). (This is only provided so the HTML tag \code{<PLAINTEXT>} +can be implemented.) +\end{funcdesc} + +\begin{funcdesc}{setliteral}{} +Enter literal mode (CDATA mode). +\end{funcdesc} + +\begin{funcdesc}{feed}{data} +Feed some text to the parser. It is processed insofar as it consists +of complete elements; incomplete data is buffered until more data is +fed or \code{close()} is called. +\end{funcdesc} + +\begin{funcdesc}{close}{} +Force processing of all buffered data as if it were followed by an +end-of-file mark. This method may be redefined by a derived class to +define additional processing at the end of the input, but the +redefined version should always call \code{SGMLParser.close()}. +\end{funcdesc} + +\begin{funcdesc}{handle_charref}{ref} +This method is called to process a character reference of the form +``\code{\&\#\var{ref};}'' where \var{ref} is a decimal number in the +range 0-255. It translates the character to ASCII and calls the +method \code{handle_data()} with the character as argument. If +\var{ref} is invalid or out of range, the method +\code{unknown_charref(\var{ref})} is called instead. +\end{funcdesc} + +\begin{funcdesc}{handle_entityref}{ref} +This method is called to process an entity reference of the form +``\code{\&\var{ref};}'' where \var{ref} is an alphabetic entity +reference. It looks for \var{ref} in the instance (or class) +variable \code{entitydefs} which should give the entity's translation. +If a translation is found, it callse the method \code{handle_data()} +with the translation; otherwise, it callse the method +\code{unknown_entityref(\var{ref})}. +\end{funcdesc} + +\begin{funcdesc}{handle_data}{data} +This method is called to process arbitrary data. It is intended to be +overridden by a derived class; the base class implementation does +nothing. +\end{funcdesc} + +\begin{funcdesc}{unknown_starttag}{tag\, attributes} +This method is called to process an unknown start tag. It is intended +to be overridden by a derived class; the base class implementation +does nothing. The \var{attributes} argument is a list of +(\var{name}, \var{value}) pairs containing the attributes found inside +the tag's \code{<>} brackets. The \var{name} has been translated to +lower case and double quotes and backslashes in the \var{value} have +been interpreted. For instance, for the tag +\code{<A HREF="http://www.cwi.nl/">}, this method would be +called as \code{unknown_starttag('a', [('href', 'http://www.cwi.nl/')])}. +\end{funcdesc} + +\begin{funcdesc}{unknown_endtag}{tag} +This method is called to process an unknown end tag. It is intended +to be overridden by a derived class; the base class implementation +does nothing. +\end{funcdesc} + +\begin{funcdesc}{unknown_charref}{ref} +This method is called to process an unknown character reference. It +is intended to be overridden by a derived class; the base class +implementation does nothing. +\end{funcdesc} + +\begin{funcdesc}{unknown_entityref}{ref} +This method is called to process an unknown entity reference. It is +intended to be overridden by a derived class; the base class +implementation does nothing. +\end{funcdesc} + +Apart from overriding or extending the methods listed above, derived +classes may also define methods of the following form to define +processing of specific tags. Tag names in the input stream are case +independent; the \var{tag} occurring in method names must be in lower +case: + +\begin{funcdesc}{start_\var{tag}}{attributes} +This method is called to process an opening tag \var{tag}. It has +preference over \code{do_\var{tag}()}. The \var{attributes} argument +has the same meaning as described for \code{unknown_tag()} above. +\end{funcdesc} + +\begin{funcdesc}{do_\var{tag}}{attributes} +This method is called to process an opening tag \var{tag} that does +not come with a matching closing tag. The \var{attributes} argument +has the same meaning as described for \code{unknown_tag()} above. +\end{funcdesc} + +\begin{funcdesc}{end_\var{tag}}{} +This method is called to process a closing tag \var{tag}. +\end{funcdesc} + +Note that the parser maintains a stack of opening tags for which no +matching closing tag has been found yet. Only tags processed by +\code{start_\var{tag}()} are pushed on this stack. Definition if a +\code{end_\var{tag}()} method is optional for these tags. For tags +processed by \code{do_\var{tag}()} or by \code{unknown_tag()}, no +\code{end_\var{tag}()} method must be defined. diff --git a/Doc/lib/libsocket.tex b/Doc/lib/libsocket.tex index 17fb314..25658d9 100644 --- a/Doc/lib/libsocket.tex +++ b/Doc/lib/libsocket.tex @@ -152,7 +152,7 @@ to the socket on the other end of the connection. \begin{funcdesc}{bind}{address} Bind the socket to \var{address}. The socket must not already be bound. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{close}{} @@ -163,7 +163,7 @@ Sockets are automatically closed when they are garbage-collected. \begin{funcdesc}{connect}{address} Connect to a remote socket at \var{address}. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{fileno}{} @@ -174,14 +174,14 @@ with \code{select}. \begin{funcdesc}{getpeername}{} Return the remote address to which the socket is connected. This is useful to find out the port number of a remote IP socket, for instance. -(The format of the address returned depends on the address family -- +(The format of the address returned depends on the address family --- see above.) On some systems this function is not supported. \end{funcdesc} \begin{funcdesc}{getsockname}{} Return the socket's own address. This is useful to find out the port number of an IP socket, for instance. -(The format of the address returned depends on the address family -- +(The format of the address returned depends on the address family --- see above.) \end{funcdesc} @@ -224,7 +224,7 @@ Receive data from the socket. The return value is a pair \code{(\var{string}, \var{address})} where \var{string} is a string representing the data received and \var{address} is the address of the socket sending the data. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{send}{string} @@ -236,7 +236,7 @@ socket. Return the number of bytes sent. Send data to the socket. The socket should not be connected to a remote socket, since the destination socket is specified by \code{address}. Return the number of bytes sent. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{setblocking}{flag} diff --git a/Doc/lib/libstring.tex b/Doc/lib/libstring.tex index 1c4e90d..7628a10 100644 --- a/Doc/lib/libstring.tex +++ b/Doc/lib/libstring.tex @@ -23,7 +23,7 @@ The constants are: \begin{datadesc}{lowercase} A string containing all the characters that are considered lowercase letters. On most systems this is the string - \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition -- + \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition --- the effect on the routines \code{upper} and \code{swapcase} is undefined. \end{datadesc} @@ -35,7 +35,7 @@ The constants are: \begin{datadesc}{uppercase} A string containing all the characters that are considered uppercase letters. On most systems this is the string - \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition -- + \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition --- the effect on the routines \code{lower} and \code{swapcase} is undefined. \end{datadesc} @@ -43,7 +43,7 @@ The constants are: \begin{datadesc}{whitespace} A string containing all characters that are considered whitespace. On most systems this includes the characters space, tab, linefeed, - return, formfeed, and vertical tab. Do not change its definition -- + return, formfeed, and vertical tab. Do not change its definition --- the effect on the routines \code{strip} and \code{split} is undefined. \end{datadesc} diff --git a/Doc/lib/liburllib.tex b/Doc/lib/liburllib.tex index 689a5c9..a6000a7 100644 --- a/Doc/lib/liburllib.tex +++ b/Doc/lib/liburllib.tex @@ -4,6 +4,8 @@ \indexii{World-Wide}{Web} \index{URL} +\renewcommand{\indexsubitem}{(in module urllib)} + This module provides a high-level interface for fetching data across the World-Wide Web. In particular, the \code{urlopen} function is similar to the built-in function \code{open}, but accepts URLs @@ -55,13 +57,13 @@ Letters, digits, and the characters ``\code{_,.-}'' are never quoted. The optional \var{addsafe} parameter specifies additional characters that should not be quoted --- its default value is \code{'/'}. -Example: \code{quote('/~conolly/')} yields \code{'/\%7econnolly/'}. +Example: \code{quote('/\~conolly/')} yields \code{'/\%7econnolly/'}. \end{funcdesc} \begin{funcdesc}{unquote}{string} Remove \code{\%xx} escapes by their single-character equivalent. -Example: \code{unquote('/\%7Econnolly/')} yields \code{'/~connolly/'}. +Example: \code{unquote('/\%7Econnolly/')} yields \code{'/\~connolly/'}. \end{funcdesc} Restrictions: diff --git a/Doc/lib/liburlparse.tex b/Doc/lib/liburlparse.tex index 8495437..6fa2a37 100644 --- a/Doc/lib/liburlparse.tex +++ b/Doc/lib/liburlparse.tex @@ -6,6 +6,8 @@ \indexii{URL}{parsing} \indexii{relative}{URL} +\renewcommand{\indexsubitem}{(in module urlparse)} + This module defines a standard interface to break URL strings up in components (addessing scheme, network location, path etc.), to combine the components back into a URL string, and to convert a ``relative diff --git a/Doc/libcgi.tex b/Doc/libcgi.tex index 9d27644..a5d1cdf 100644 --- a/Doc/libcgi.tex +++ b/Doc/libcgi.tex @@ -6,6 +6,8 @@ \indexii{MIME}{headers} \index{URL} +\renewcommand{\indexsubitem}{(in module cgi)} + This module makes it easy to write Python scripts that run in a WWW server using the Common Gateway Interface. It was written by Michael McLay and subsequently modified by Steve Majewski and Guido van @@ -113,7 +115,7 @@ if it is unique, or raise \code{IndexError} if the field was specified more than once in the form. (If the field wasn't specified at all, \code{KeyError} is raised.) To access fields that are specified multiple times, use \code{form.getlist(fieldname)}. The -\code{values()} and \code{items()} methods return mixed lists -- +\code{values()} and \code{items()} methods return mixed lists --- containing strings for singly-defined fields, and lists of strings for multiply-defined fields. \end{funcdesc} diff --git a/Doc/libftplib.tex b/Doc/libftplib.tex index 105ccdf..acd8784 100644 --- a/Doc/libftplib.tex +++ b/Doc/libftplib.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{ftplib}} \stmodindex{ftplib} + +\renewcommand{\indexsubitem}{(in module ftplib)} + To be provided. diff --git a/Doc/libfuncs.tex b/Doc/libfuncs.tex index ed2427b..91a9ec9 100644 --- a/Doc/libfuncs.tex +++ b/Doc/libfuncs.tex @@ -120,7 +120,7 @@ exactly one argument.) This function is similar to the \code{eval()} function or the \code{exec} statement, but parses a file instead of a string. It is different from the \code{import} statement in that it does not use - the module administration -- it reads the file unconditionally and + the module administration --- it reads the file unconditionally and does not create a new module. The arguments are a file name and two optional dictionaries. The diff --git a/Doc/libgopherlib.tex b/Doc/libgopherlib.tex index 9b81e37..904a64b 100644 --- a/Doc/libgopherlib.tex +++ b/Doc/libgopherlib.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{gopherlib}} \stmodindex{gopherlib} + +\renewcommand{\indexsubitem}{(in module gopherlib)} + To be provided. diff --git a/Doc/libhtmllib.tex b/Doc/libhtmllib.tex index 9ea10ee..e192774 100644 --- a/Doc/libhtmllib.tex +++ b/Doc/libhtmllib.tex @@ -1,3 +1,271 @@ \section{Built-in module \sectcode{htmllib}} \stmodindex{htmllib} -To be provided. +\index{HTML} +\index{hypertext} + +\renewcommand{\indexsubitem}{(in module htmllib)} + +This module defines a number of classes which can serve as a basis for +parsing text files formatted in HTML (HyperText Mark-up Language). +The classes are not directly concerned with I/O --- the have to be fed +their input in string form, and will make calls to methods of a +``formatter'' object in order to produce output. The classes are +designed to be used as base classes for other classes in order to add +functionality, and allow most of their methods to be extended or +overridden. In turn, the classes are derived from and extend the +class \code{SGMLParser} defined in module \code{sgmllib}. +\index{SGML} +\stmodindex{sgmllib} +\ttindex{SGMLParser} +\index{formatter} + +The following is a summary of the interface defined by +\code{sgmllib.SGMLParser}: + +\begin{itemize} + +\item +The interface to feed data to an instance is through the \code{feed()} +method, which takes a string argument. This can be called with as +little or as much text at a time. When the data contains complete +HTML elements, these are processed immediately; incomplete elements +are saved in a buffer. To force processing of all unprocessed data, +call the \code{close()} method. + +Example: to parse the entire contents of a file, do +\code{parser.feed(open(file).read()); parser.close()}. + +\item +The interface to define semantics for HTML tags is very simple: derive +a class and define methods called \code{start_\var{tag}()}, +\code{end_\var{tag}()}, or \code{do_\var{tag}()}. The parser will +call these at appropriate moments: \code{start_\var{tag}} or +\code{do_\var{tag}} is called when an opening tag of the form +\code{<\var{tag} ...>} is encountered; \code{end_\var{tag}} is called +when a closing tag of the form \code{<\var{tag}>} is encountered. If +an opening tag requires a corresponding closing tag, like \code{<H1>} +... \code{</H1>}, the class should define the \code{start_\var{tag}} +method; if a tag requires no closing tag, like \code{<P>}, the class +should define the \code{do_\var{tag}} method. + +\end{itemize} + +The module defines the following classes: + +\begin{funcdesc}{HTMLParser}{} +This is the most basic HTML parser class. It defines one additional +entity name over the names defined by the \code{SGMLParser} base +class, \code{\&bullet;}. It also defines handlers for the following +tags: \code{<LISTING>...</LISTING>}, \code{<XMP>...</XMP>}, and +\code{<PLAINTEXT>} (the latter is terminated only by end of file). +\end{funcdesc} + +\begin{funcdesc}{CollectingParser}{} +This class, derived from \code{HTMLParser}, collects various useful +bits of information from the HTML text. To this end it defines +additional handlers for the following tags: \code{<A>...</A>}, +\code{<HEAD>...</HEAD>}, \code{<BODY>...</BODY>}, +\code{<TITLE>...</TITLE>}, \code{<NEXTID>}, and \code{<ISINDEX>}. +\end{funcdesc} + +\begin{funcdesc}{FormattingParser}{formatter\, stylesheet} +This class, derived from \code{CollectingParser}, interprets a wide +selection of HTML tags so it can produce formatted output from the +parsed data. It is initialized with two objects, a \var{formatter} +which should define a number of methods to format text into +paragraphs, and a \var{stylesheet} which defines a number of static +parameters for the formatting process. Formatters and style sheets +are documented later in this section. +\index{formatter} +\index{style sheet} +\end{funcdesc} + +\begin{funcdesc}{AnchoringParser}{formatter\, stylesheet} +This class, derived from \code{FormattingParser}, extends the handling +of the \code{<A>...</A>} tag pair to call the formatter's +\code{bgn_anchor()} and \code{end_anchor()} methods. This allows the +formatter to display the anchor in a different font or color, etc. +\end{funcdesc} + +Instances of \code{CollectingParser} (and thus also instances of +\code{FormattingParser} and \code{AnchoringParser}) have the following +instance variables: + +\begin{datadesc}{anchornames} +A list of the values if the \code{NAME} attributes of the \code{<A>} +tags encountered. +\end{datadesc} + +\begin{datadesc}{anchors} +A list of the values of \code{HREF} attributes of the \code{<A>} tags +encountered. +\end{datadesc} + +\begin{datadesc}{anchortypes} +A list of the values if the \code{TYPE} attributes of the \code{<A>} +tags encountered. +\end{datadesc} + +\begin{datadesc}{inanchor} +Outside an \code{<A>...</A>} tag pair, this is zero. inside such a +pair, it is a unique integer, which is positive if the anchor has a +\code{HREF} attribute, negative if it hasn't. Its absolute value is +one more than the index of the anchor in the \code{anchors}, +\code{anchornames} and \code{anchortypes} lists. +\end{datadesc} + +\begin{datadesc}{isindex} +True if the \code{<ISINDEX>} tag has been encountered. +\end{datadesc} + +\begin{datadesc}{nextid} +The attribute list of the last \code{<NEXTID>} tag encountered, or +an empty list if none. +\end{datadesc} + +\begin{datadesc}{title} +The text inside the last \code{<TITLE>...</TITLE>} tag pair, or +\code{''} if no title has been encountered yet. +\end{datadesc} + +The \code{anchors}, \code{anchornames} and \code{anchortypes} lists +are ``parallel arrays'': items in these lists with the same index +pertain to the same anchor. Missing attributes default to the empty +string. Anchors with neither a \code{HREF} not a \code{NAME} +attribute are not entered in these lists at all. + +The module also defines a number of style sheet classes. These should +never be instantiated --- their class variables are the only behaviour +required. Note that style sheets are specifically designed for a +particular formatter implementation. The currently defined style +sheets are: +\index{style sheet} + +\begin{datadesc}{NullStylesheet} +A style sheet for use on a dumb output device such as an ASCII +terminal. +\end{datadesc} + +\begin{datadesc}{X11Stylesheet} +A style sheet for use with an X11 server. +\end{datadesc} + +\begin{datadesc}{MacStylesheet} +A style sheet for use on Apple Macintosh computers. +\end{datadesc} + +\begin{datadesc}{StdwinStylesheet} +A style sheet for use with the \code{stdwin} module; it is an alias +for either \code{X11Stylesheet} or \code{MacStylesheet}. +\bimodindex{stdwin} +\end{datadesc} + +\begin{datadesc}{GLStylesheet} +A style sheet for use with the SGI Graphics Library and its font +manager (the SGI-specific built-in modules \code{gl} and \code{fm}). +\bimodindex{gl} +\bimodindex{fm} +\end{datadesc} + +Style sheets have the following class variables: + +\begin{datadesc}{stdfontset} +A list of up to four font definititions, respectively for the roman, +italic, bold and constant-width variant of a font for normal text. If +the list contains less than four font definitions, the last item is +used as the default for missing items. The type of a font definition +depends on the formatter in use; its only use is as a parameter to the +formatter's \code{setfont()} method. +\end{datadesc} + +\begin{datadesc}{h1fontset} +\dataline{h2fontset} +\dataline{h3fontset} +The font set used for various headers (text inside \code{<H1>...</H1>} +tag pairs etc.). +\end{datadesc} + +\begin{datadesc}{stdindent} +The indentation of normal text. This is measured in the ``native'' +units of the formatter in use; for some formatters these are +characters, for others (especially those that actually support +variable-spacing fonts) in pixels or printer points. +\end{datadesc} + +\begin{datadesc}{ddindent} +The indentation used for the first level of \code{<DD>} tags. +\end{datadesc} + +\begin{datadesc}{ulindent} +The indentation used for the first level of \code{<UL>} tags. +\end{datadesc} + +\begin{datadesc}{h1indent} +The indentation used for level 1 headers. +\end{datadesc} + +\begin{datadesc}{h2indent} +The indentation used for level 2 headers. +\end{datadesc} + +\begin{datadesc}{literalindent} +The indentation used for literal text (text inside +\code{<PRE>...</PRE>} and similar tag pairs). +\end{datadesc} + +Although no documented implementation of a formatter exists, the +\code{FormattingParser} class assumes that formatters have a +certain interface. This interface requires the following methods: +\index{formatter} + +\begin{funcdesc}{setfont}{fontspec} +Set the font to be used subsequently. The \var{fontspec} argument is +an item in a style sheet's font set. +\end{funcdesc} + +\begin{funcdesc}{flush}{} +Finish the current line, if not empty, and begin a new one. +\end{funcdesc} + +\begin{funcdesc}{setleftindent}{n} +Set the left indentation of the following lines to \var{n} units. +\end{funcdesc} + +\begin{funcdesc}{needvspace}{n} +Require at least \var{n} blank lines before the next line. Implies +\code{flush()}. +\end{funcdesc} + +\begin{funcdesc}{addword}{word\, space} +Add a var{word} to the current paragraph, followed by \var{space} +spaces. +\end{funcdesc} + +\begin{datadesc}{nospace} +If this instance variable is true, empty words are ignored by +\code{addword}. It is set to false after a non-empty word has been +added. +\end{datadesc} + +\begin{funcdesc}{setjust}{justification} +Set the justification of the current paragraph. The +\var{justification} can be \code{'c'} (center), \code{'l'} (left +justified), \code{'r'} (right justified) or \code{'lr'} (left and +right justified). +\end{funcdesc} + +\begin{funcdesc}{bgn_anchor}{id} +Begin an anchor. The \var{id} parameter is the value of the parser's +\code{inanchor} attribute. +\end{funcdesc} + +\begin{funcdesc}{end_anchor}{id} +End an anchor. The \var{id} parameter is the value of the parser's +\code{inanchor} attribute. +\end{funcdesc} + +A sample formatters implementation can be found in the module +\code{fmt}, which in turn uses the module \code{Para}. These are +currently not intended as a +\ttindex{fmt} +\ttindex{Para} diff --git a/Doc/libhttplib.tex b/Doc/libhttplib.tex index a284faa..e36bba4 100644 --- a/Doc/libhttplib.tex +++ b/Doc/libhttplib.tex @@ -2,6 +2,8 @@ \stmodindex{httplib} \index{HTTP} +\renewcommand{\indexsubitem}{(in module httplib)} + This module defines a class which implements the client side of the HTTP protocol. It is normally not used directly --- the module \code{urlllib} module uses it to handle URLs that use HTTP. diff --git a/Doc/libimp.tex b/Doc/libimp.tex index 1a313fa..befde61 100644 --- a/Doc/libimp.tex +++ b/Doc/libimp.tex @@ -38,7 +38,7 @@ returned by \code{get_suffixes} describing the kind of file found. \begin{funcdesc}{init_builtin}{name} Initialize the built-in module called \var{name} and return its module object. If the module was already initialized, it will be initialized -{\em again}. A few modules cannot be initialized twice -- attempting +{\em again}. A few modules cannot be initialized twice --- attempting to initialize these again will raise an exception. If there is no built-in module called \var{name}, \code{None} is returned. \end{funcdesc} @@ -73,7 +73,7 @@ it will be initialized {\em again}. The \var{name} argument is used to create or access a module object. The \var{pathname} argument points to the byte-compiled code file. The optional \var{file} argument is the byte-compiled code file, open for reading in binary -mode, from the beginning -- if not given, the function opens +mode, from the beginning --- if not given, the function opens \var{pathname}. It must currently be a real file object, not a user-defined class emulating a file. \end{funcdesc} @@ -97,7 +97,7 @@ return its module object. If the module was already initialized, it will be initialized {\em again}. The \var{name} argument is used to create or access a module object. The \var{pathname} argument points to the source file. The optional \var{file} argument is the source -file, open for reading as text, from the beginning -- if not given, +file, open for reading as text, from the beginning --- if not given, the function opens \var{pathname}. It must currently be a real file object, not a user-defined class emulating a file. Note that if a properly matching byte-compiled file (with suffix \code{.pyc}) exists, diff --git a/Doc/libmac.tex b/Doc/libmac.tex index 77c8956..9a6ccd9 100644 --- a/Doc/libmac.tex +++ b/Doc/libmac.tex @@ -1,6 +1,6 @@ \chapter{MACINTOSH ONLY} -The following modules are available on the Apple Macintosh only. +The modules in this chapter are available on the Apple Macintosh only. \section{Built-in module \sectcode{mac}} @@ -38,3 +38,7 @@ The following functions are available in this module: \code{isdir}, \code{isfile}, \code{exists}. + +\input{libmacconsole} +\input{libmacfs} +\input{libmacspeech} diff --git a/Doc/libmacconsole.tex b/Doc/libmacconsole.tex index 1a09e6b..0be429b 100644 --- a/Doc/libmacconsole.tex +++ b/Doc/libmacconsole.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{macconsole}} \bimodindex{macconsole} +\renewcommand{\indexsubitem}{(in module macconsole)} + This module is available on the Macintosh, provided Python has been built using the Think C compiler. It provides an interface to the Think console package, with which basic text windows can be created. diff --git a/Doc/libmacfs.tex b/Doc/libmacfs.tex index 86d108a..bd22498 100644 --- a/Doc/libmacfs.tex +++ b/Doc/libmacfs.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{macfs}} \bimodindex{macfs} +\renewcommand{\indexsubitem}{(in module macfs)} + This module provides access to macintosh FSSpec handling, the Alias Manager, finder aliases and the Standard File package. diff --git a/Doc/libmacspeech.tex b/Doc/libmacspeech.tex index 7c71c44..0c1d88c 100644 --- a/Doc/libmacspeech.tex +++ b/Doc/libmacspeech.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{macspeech}} \bimodindex{macspeech} +\renewcommand{\indexsubitem}{(in module macspeech)} + This module provides an interface to the Macintosh Speech Manager, allowing you to let the macintosh utter phrases. You need a version of the speech manager extension (version 1 and 2 have been tested) in diff --git a/Doc/libmimetools.tex b/Doc/libmimetools.tex index c32224b..0d1a81b 100644 --- a/Doc/libmimetools.tex +++ b/Doc/libmimetools.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{mimetools}} \stmodindex{mimetools} + +\renewcommand{\indexsubitem}{(in module mimetools)} + To be provided. diff --git a/Doc/libnntplib.tex b/Doc/libnntplib.tex index 93e7ed1..6aac671 100644 --- a/Doc/libnntplib.tex +++ b/Doc/libnntplib.tex @@ -1,3 +1,6 @@ \section{Built-in module \sectcode{nntplib}} \stmodindex{nntplib} + +\renewcommand{\indexsubitem}{(in module nntplib)} + To be provided. diff --git a/Doc/librfc822.tex b/Doc/librfc822.tex index 43a5cea..641ea85 100644 --- a/Doc/librfc822.tex +++ b/Doc/librfc822.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{rfc822}} \stmodindex{rfc822} +\renewcommand{\indexsubitem}{(in module rfc822)} + This module defines a class, \code{Message}, which represents a collection of ``email headers'' as defined by the Internet standard RFC 822. It is used in various contexts, usually to read such headers diff --git a/Doc/libsgmllib.tex b/Doc/libsgmllib.tex index 03d9ba2..29e26c2 100644 --- a/Doc/libsgmllib.tex +++ b/Doc/libsgmllib.tex @@ -1,3 +1,148 @@ \section{Built-in module \sectcode{sgmllib}} \stmodindex{sgmllib} -To be provided. +\index{SGML} + +\renewcommand{\indexsubitem}{(in module sgmllib)} + +This module defines a class \code{SGMLParser} which serves as the +basis for parsing text files formatted in SGML (Standard Generalized +Mark-up Language). In fact, it does not provide a full SGML parser +--- it only parses SGML insofar as it is used by HTML, and module only +exists as a basis for the \code{htmllib} module. +\stmodindex{htmllib} + +In particular, the parser is hardcoded to recognize the following +elements: + +\begin{itemize} + +\item +Opening and closing tags of the form +``\code{<\var{tag} \var{attr}="\var{value}" ...>}'' and +``\code{</\var{tag}>}'', respectively. + +\item +Character references of the form ``\code{\&\#\var{name};}''. + +\item +Entity references of the form ``\code{\&\var{name};}''. + +\item +SGML comments of the form ``\code{<!--\var{text}>}''. + +\end{itemize} + +The \code{SGMLParser} class must be instantiated without arguments. +It has the following interface methods: + +\begin{funcdesc}{reset}{} +Reset the instance. Loses all unprocessed data. This is called +implicitly at instantiation time. +\end{funcdesc} + +\begin{funcdesc}{setnomoretags}{} +Stop processing tags. Treat all following input as literal input +(CDATA). (This is only provided so the HTML tag \code{<PLAINTEXT>} +can be implemented.) +\end{funcdesc} + +\begin{funcdesc}{setliteral}{} +Enter literal mode (CDATA mode). +\end{funcdesc} + +\begin{funcdesc}{feed}{data} +Feed some text to the parser. It is processed insofar as it consists +of complete elements; incomplete data is buffered until more data is +fed or \code{close()} is called. +\end{funcdesc} + +\begin{funcdesc}{close}{} +Force processing of all buffered data as if it were followed by an +end-of-file mark. This method may be redefined by a derived class to +define additional processing at the end of the input, but the +redefined version should always call \code{SGMLParser.close()}. +\end{funcdesc} + +\begin{funcdesc}{handle_charref}{ref} +This method is called to process a character reference of the form +``\code{\&\#\var{ref};}'' where \var{ref} is a decimal number in the +range 0-255. It translates the character to ASCII and calls the +method \code{handle_data()} with the character as argument. If +\var{ref} is invalid or out of range, the method +\code{unknown_charref(\var{ref})} is called instead. +\end{funcdesc} + +\begin{funcdesc}{handle_entityref}{ref} +This method is called to process an entity reference of the form +``\code{\&\var{ref};}'' where \var{ref} is an alphabetic entity +reference. It looks for \var{ref} in the instance (or class) +variable \code{entitydefs} which should give the entity's translation. +If a translation is found, it callse the method \code{handle_data()} +with the translation; otherwise, it callse the method +\code{unknown_entityref(\var{ref})}. +\end{funcdesc} + +\begin{funcdesc}{handle_data}{data} +This method is called to process arbitrary data. It is intended to be +overridden by a derived class; the base class implementation does +nothing. +\end{funcdesc} + +\begin{funcdesc}{unknown_starttag}{tag\, attributes} +This method is called to process an unknown start tag. It is intended +to be overridden by a derived class; the base class implementation +does nothing. The \var{attributes} argument is a list of +(\var{name}, \var{value}) pairs containing the attributes found inside +the tag's \code{<>} brackets. The \var{name} has been translated to +lower case and double quotes and backslashes in the \var{value} have +been interpreted. For instance, for the tag +\code{<A HREF="http://www.cwi.nl/">}, this method would be +called as \code{unknown_starttag('a', [('href', 'http://www.cwi.nl/')])}. +\end{funcdesc} + +\begin{funcdesc}{unknown_endtag}{tag} +This method is called to process an unknown end tag. It is intended +to be overridden by a derived class; the base class implementation +does nothing. +\end{funcdesc} + +\begin{funcdesc}{unknown_charref}{ref} +This method is called to process an unknown character reference. It +is intended to be overridden by a derived class; the base class +implementation does nothing. +\end{funcdesc} + +\begin{funcdesc}{unknown_entityref}{ref} +This method is called to process an unknown entity reference. It is +intended to be overridden by a derived class; the base class +implementation does nothing. +\end{funcdesc} + +Apart from overriding or extending the methods listed above, derived +classes may also define methods of the following form to define +processing of specific tags. Tag names in the input stream are case +independent; the \var{tag} occurring in method names must be in lower +case: + +\begin{funcdesc}{start_\var{tag}}{attributes} +This method is called to process an opening tag \var{tag}. It has +preference over \code{do_\var{tag}()}. The \var{attributes} argument +has the same meaning as described for \code{unknown_tag()} above. +\end{funcdesc} + +\begin{funcdesc}{do_\var{tag}}{attributes} +This method is called to process an opening tag \var{tag} that does +not come with a matching closing tag. The \var{attributes} argument +has the same meaning as described for \code{unknown_tag()} above. +\end{funcdesc} + +\begin{funcdesc}{end_\var{tag}}{} +This method is called to process a closing tag \var{tag}. +\end{funcdesc} + +Note that the parser maintains a stack of opening tags for which no +matching closing tag has been found yet. Only tags processed by +\code{start_\var{tag}()} are pushed on this stack. Definition if a +\code{end_\var{tag}()} method is optional for these tags. For tags +processed by \code{do_\var{tag}()} or by \code{unknown_tag()}, no +\code{end_\var{tag}()} method must be defined. diff --git a/Doc/libsocket.tex b/Doc/libsocket.tex index 17fb314..25658d9 100644 --- a/Doc/libsocket.tex +++ b/Doc/libsocket.tex @@ -152,7 +152,7 @@ to the socket on the other end of the connection. \begin{funcdesc}{bind}{address} Bind the socket to \var{address}. The socket must not already be bound. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{close}{} @@ -163,7 +163,7 @@ Sockets are automatically closed when they are garbage-collected. \begin{funcdesc}{connect}{address} Connect to a remote socket at \var{address}. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{fileno}{} @@ -174,14 +174,14 @@ with \code{select}. \begin{funcdesc}{getpeername}{} Return the remote address to which the socket is connected. This is useful to find out the port number of a remote IP socket, for instance. -(The format of the address returned depends on the address family -- +(The format of the address returned depends on the address family --- see above.) On some systems this function is not supported. \end{funcdesc} \begin{funcdesc}{getsockname}{} Return the socket's own address. This is useful to find out the port number of an IP socket, for instance. -(The format of the address returned depends on the address family -- +(The format of the address returned depends on the address family --- see above.) \end{funcdesc} @@ -224,7 +224,7 @@ Receive data from the socket. The return value is a pair \code{(\var{string}, \var{address})} where \var{string} is a string representing the data received and \var{address} is the address of the socket sending the data. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{send}{string} @@ -236,7 +236,7 @@ socket. Return the number of bytes sent. Send data to the socket. The socket should not be connected to a remote socket, since the destination socket is specified by \code{address}. Return the number of bytes sent. -(The format of \var{address} depends on the address family -- see above.) +(The format of \var{address} depends on the address family --- see above.) \end{funcdesc} \begin{funcdesc}{setblocking}{flag} diff --git a/Doc/libstring.tex b/Doc/libstring.tex index 1c4e90d..7628a10 100644 --- a/Doc/libstring.tex +++ b/Doc/libstring.tex @@ -23,7 +23,7 @@ The constants are: \begin{datadesc}{lowercase} A string containing all the characters that are considered lowercase letters. On most systems this is the string - \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition -- + \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition --- the effect on the routines \code{upper} and \code{swapcase} is undefined. \end{datadesc} @@ -35,7 +35,7 @@ The constants are: \begin{datadesc}{uppercase} A string containing all the characters that are considered uppercase letters. On most systems this is the string - \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition -- + \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition --- the effect on the routines \code{lower} and \code{swapcase} is undefined. \end{datadesc} @@ -43,7 +43,7 @@ The constants are: \begin{datadesc}{whitespace} A string containing all characters that are considered whitespace. On most systems this includes the characters space, tab, linefeed, - return, formfeed, and vertical tab. Do not change its definition -- + return, formfeed, and vertical tab. Do not change its definition --- the effect on the routines \code{strip} and \code{split} is undefined. \end{datadesc} diff --git a/Doc/liburllib.tex b/Doc/liburllib.tex index 689a5c9..a6000a7 100644 --- a/Doc/liburllib.tex +++ b/Doc/liburllib.tex @@ -4,6 +4,8 @@ \indexii{World-Wide}{Web} \index{URL} +\renewcommand{\indexsubitem}{(in module urllib)} + This module provides a high-level interface for fetching data across the World-Wide Web. In particular, the \code{urlopen} function is similar to the built-in function \code{open}, but accepts URLs @@ -55,13 +57,13 @@ Letters, digits, and the characters ``\code{_,.-}'' are never quoted. The optional \var{addsafe} parameter specifies additional characters that should not be quoted --- its default value is \code{'/'}. -Example: \code{quote('/~conolly/')} yields \code{'/\%7econnolly/'}. +Example: \code{quote('/\~conolly/')} yields \code{'/\%7econnolly/'}. \end{funcdesc} \begin{funcdesc}{unquote}{string} Remove \code{\%xx} escapes by their single-character equivalent. -Example: \code{unquote('/\%7Econnolly/')} yields \code{'/~connolly/'}. +Example: \code{unquote('/\%7Econnolly/')} yields \code{'/\~connolly/'}. \end{funcdesc} Restrictions: diff --git a/Doc/liburlparse.tex b/Doc/liburlparse.tex index 8495437..6fa2a37 100644 --- a/Doc/liburlparse.tex +++ b/Doc/liburlparse.tex @@ -6,6 +6,8 @@ \indexii{URL}{parsing} \indexii{relative}{URL} +\renewcommand{\indexsubitem}{(in module urlparse)} + This module defines a standard interface to break URL strings up in components (addessing scheme, network location, path etc.), to combine the components back into a URL string, and to convert a ``relative diff --git a/Doc/mac/libmac.tex b/Doc/mac/libmac.tex index 77c8956..9a6ccd9 100644 --- a/Doc/mac/libmac.tex +++ b/Doc/mac/libmac.tex @@ -1,6 +1,6 @@ \chapter{MACINTOSH ONLY} -The following modules are available on the Apple Macintosh only. +The modules in this chapter are available on the Apple Macintosh only. \section{Built-in module \sectcode{mac}} @@ -38,3 +38,7 @@ The following functions are available in this module: \code{isdir}, \code{isfile}, \code{exists}. + +\input{libmacconsole} +\input{libmacfs} +\input{libmacspeech} diff --git a/Doc/mac/libmacconsole.tex b/Doc/mac/libmacconsole.tex index 1a09e6b..0be429b 100644 --- a/Doc/mac/libmacconsole.tex +++ b/Doc/mac/libmacconsole.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{macconsole}} \bimodindex{macconsole} +\renewcommand{\indexsubitem}{(in module macconsole)} + This module is available on the Macintosh, provided Python has been built using the Think C compiler. It provides an interface to the Think console package, with which basic text windows can be created. diff --git a/Doc/mac/libmacfs.tex b/Doc/mac/libmacfs.tex index 86d108a..bd22498 100644 --- a/Doc/mac/libmacfs.tex +++ b/Doc/mac/libmacfs.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{macfs}} \bimodindex{macfs} +\renewcommand{\indexsubitem}{(in module macfs)} + This module provides access to macintosh FSSpec handling, the Alias Manager, finder aliases and the Standard File package. diff --git a/Doc/mac/libmacspeech.tex b/Doc/mac/libmacspeech.tex index 7c71c44..0c1d88c 100644 --- a/Doc/mac/libmacspeech.tex +++ b/Doc/mac/libmacspeech.tex @@ -1,6 +1,8 @@ \section{Built-in module \sectcode{macspeech}} \bimodindex{macspeech} +\renewcommand{\indexsubitem}{(in module macspeech)} + This module provides an interface to the Macintosh Speech Manager, allowing you to let the macintosh utter phrases. You need a version of the speech manager extension (version 1 and 2 have been tested) in diff --git a/Doc/ref/ref3.tex b/Doc/ref/ref3.tex index 67848bb8..35b4ecf 100644 --- a/Doc/ref/ref3.tex +++ b/Doc/ref/ref3.tex @@ -374,7 +374,7 @@ Class objects are described below. When a class object is called as a function, a new class instance (also described below) is created and returned. This implies a call to the class's \verb@__init__@ method if it has one. Any arguments are passed on to the \verb@__init__@ -method -- if there is \verb@__init__@ method, the class must be called +method --- if there is \verb@__init__@ method, the class must be called without arguments. \ttindex{__init__} \obindex{class} @@ -617,7 +617,7 @@ reference is deleted. It is not guaranteed that \code{__del__} methods are called for objects that still exist when the interpreter exits. -Note that \code{del x} doesn't directly call \code{x.__del__} -- the +Note that \code{del x} doesn't directly call \code{x.__del__} --- the former decrements the reference count for \code{x} by one, but \code{x,__del__} is only called when its reference count reaches zero. @@ -694,7 +694,7 @@ attribute). \code{name} is the attribute name, \code{value} is the value to be assigned to it. If \code{__setattr__} wants to assign to an instance attribute, it -should not simply execute \code{self.\var{name} = value} -- this would +should not simply execute \code{self.\var{name} = value} --- this would cause a recursive call. Instead, it should insert the value in the dictionary of instance attributes, e.g. \code{self.__dict__[name] = value}. diff --git a/Doc/ref/ref4.tex b/Doc/ref/ref4.tex index 0198117..2f2e1c8 100644 --- a/Doc/ref/ref4.tex +++ b/Doc/ref/ref4.tex @@ -82,7 +82,7 @@ The following table lists the meaning of the local and global name space for various types of code blocks. The name space for a particular module is automatically created when the module is first referenced. Note that in almost all cases, the global name space is -the name space of the containing module -- scopes in Python do not +the name space of the containing module --- scopes in Python do not nest! \begin{center} diff --git a/Doc/ref3.tex b/Doc/ref3.tex index 67848bb8..35b4ecf 100644 --- a/Doc/ref3.tex +++ b/Doc/ref3.tex @@ -374,7 +374,7 @@ Class objects are described below. When a class object is called as a function, a new class instance (also described below) is created and returned. This implies a call to the class's \verb@__init__@ method if it has one. Any arguments are passed on to the \verb@__init__@ -method -- if there is \verb@__init__@ method, the class must be called +method --- if there is \verb@__init__@ method, the class must be called without arguments. \ttindex{__init__} \obindex{class} @@ -617,7 +617,7 @@ reference is deleted. It is not guaranteed that \code{__del__} methods are called for objects that still exist when the interpreter exits. -Note that \code{del x} doesn't directly call \code{x.__del__} -- the +Note that \code{del x} doesn't directly call \code{x.__del__} --- the former decrements the reference count for \code{x} by one, but \code{x,__del__} is only called when its reference count reaches zero. @@ -694,7 +694,7 @@ attribute). \code{name} is the attribute name, \code{value} is the value to be assigned to it. If \code{__setattr__} wants to assign to an instance attribute, it -should not simply execute \code{self.\var{name} = value} -- this would +should not simply execute \code{self.\var{name} = value} --- this would cause a recursive call. Instead, it should insert the value in the dictionary of instance attributes, e.g. \code{self.__dict__[name] = value}. diff --git a/Doc/ref4.tex b/Doc/ref4.tex index 0198117..2f2e1c8 100644 --- a/Doc/ref4.tex +++ b/Doc/ref4.tex @@ -82,7 +82,7 @@ The following table lists the meaning of the local and global name space for various types of code blocks. The name space for a particular module is automatically created when the module is first referenced. Note that in almost all cases, the global name space is -the name space of the containing module -- scopes in Python do not +the name space of the containing module --- scopes in Python do not nest! \begin{center} diff --git a/Doc/tut.tex b/Doc/tut.tex index ffe03d0..2e11352 100644 --- a/Doc/tut.tex +++ b/Doc/tut.tex @@ -2965,7 +2965,7 @@ Reference for a full description. \section{Generalized Dictionaries} -The keys of dictionaries are no longer restricted to strings -- they +The keys of dictionaries are no longer restricted to strings --- they can be any immutable basic type including strings, numbers, tuples, or (certain) class instances. (Lists and dictionaries are not acceptable as dictionary keys, in order to avoid problems when the object used as @@ -3097,7 +3097,7 @@ You can define three new ``magic'' methods in a class now: and \code{__delattr__(self, name)}. The \code{__getattr__} method is called when an attribute access fails, -i.e. when an attribute access would otherwise raise AttributeError -- +i.e. when an attribute access would otherwise raise AttributeError --- this is {\em after} the instance's dictionary and its class hierarchy have been searched for the named attribute. Note that if this method attempts to access any undefined instance attribute it will be called @@ -3108,7 +3108,7 @@ assignment to, respectively deletion of an attribute are attempted. They are called {\em instead} of the normal action (which is to insert or delete the attribute in the instance dictionary). If either of these methods most set or delete any attribute, they can only do so by -using the instance dictionary directly -- \code{self.__dict__} -- else +using the instance dictionary directly --- \code{self.__dict__} --- else they would be called recursively. For example, here's a near-universal ``Wrapper'' class that passes all @@ -3504,9 +3504,9 @@ A rudimentary, parser for HTML files is available in the module \code{htmllib}. It currently supports a subset of HTML 1.0 (if you bring it up to date, I'd love to receive your fixes!). Unfortunately Python seems to be too slow for real-time parsing and formatting of -HTML such as required by interactive WWW browsers --- but it's ideal -for writing a ``robot'' (an automated WWW browser that searches the -web for information). +HTML such as required by interactive WWW browsers --- but it's good +enough to write a ``robot'' (an automated WWW browser that searches +the web for information). \section{Miscellaneous} diff --git a/Doc/tut/tut.tex b/Doc/tut/tut.tex index ffe03d0..2e11352 100644 --- a/Doc/tut/tut.tex +++ b/Doc/tut/tut.tex @@ -2965,7 +2965,7 @@ Reference for a full description. \section{Generalized Dictionaries} -The keys of dictionaries are no longer restricted to strings -- they +The keys of dictionaries are no longer restricted to strings --- they can be any immutable basic type including strings, numbers, tuples, or (certain) class instances. (Lists and dictionaries are not acceptable as dictionary keys, in order to avoid problems when the object used as @@ -3097,7 +3097,7 @@ You can define three new ``magic'' methods in a class now: and \code{__delattr__(self, name)}. The \code{__getattr__} method is called when an attribute access fails, -i.e. when an attribute access would otherwise raise AttributeError -- +i.e. when an attribute access would otherwise raise AttributeError --- this is {\em after} the instance's dictionary and its class hierarchy have been searched for the named attribute. Note that if this method attempts to access any undefined instance attribute it will be called @@ -3108,7 +3108,7 @@ assignment to, respectively deletion of an attribute are attempted. They are called {\em instead} of the normal action (which is to insert or delete the attribute in the instance dictionary). If either of these methods most set or delete any attribute, they can only do so by -using the instance dictionary directly -- \code{self.__dict__} -- else +using the instance dictionary directly --- \code{self.__dict__} --- else they would be called recursively. For example, here's a near-universal ``Wrapper'' class that passes all @@ -3504,9 +3504,9 @@ A rudimentary, parser for HTML files is available in the module \code{htmllib}. It currently supports a subset of HTML 1.0 (if you bring it up to date, I'd love to receive your fixes!). Unfortunately Python seems to be too slow for real-time parsing and formatting of -HTML such as required by interactive WWW browsers --- but it's ideal -for writing a ``robot'' (an automated WWW browser that searches the -web for information). +HTML such as required by interactive WWW browsers --- but it's good +enough to write a ``robot'' (an automated WWW browser that searches +the web for information). \section{Miscellaneous} -- cgit v0.12