From a12ef9433bafc0507f1b37e19982a0af5eefc8dd Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 27 Feb 1995 17:53:25 +0000 Subject: added WWW sections --- Doc/Makefile | 20 +++++--- Doc/lib.tex | 2 + Doc/lib/lib.tex | 2 + Doc/lib/libcgi.tex | 130 +++++++++++++++++++++++++++++++++++++++++++++++ Doc/lib/libftplib.tex | 3 ++ Doc/lib/libgopherlib.tex | 3 ++ Doc/lib/libhtmllib.tex | 3 ++ Doc/lib/libhttplib.tex | 93 +++++++++++++++++++++++++++++++++ Doc/lib/libmimetools.tex | 3 ++ Doc/lib/libnntplib.tex | 3 ++ Doc/lib/librfc822.tex | 108 +++++++++++++++++++++++++++++++++++++++ Doc/lib/libsgmllib.tex | 3 ++ Doc/lib/libtypes2.tex | 120 +++++++++++++++++++++++++++++++++++++++++++ Doc/lib/liburlparse.tex | 68 +++++++++++++++++++++++++ Doc/libcgi.tex | 130 +++++++++++++++++++++++++++++++++++++++++++++++ Doc/libftplib.tex | 3 ++ Doc/libgopherlib.tex | 3 ++ Doc/libhtmllib.tex | 3 ++ Doc/libhttplib.tex | 93 +++++++++++++++++++++++++++++++++ Doc/libmimetools.tex | 3 ++ Doc/libnntplib.tex | 3 ++ Doc/librfc822.tex | 108 +++++++++++++++++++++++++++++++++++++++ Doc/libsgmllib.tex | 3 ++ Doc/libtypes2.tex | 120 +++++++++++++++++++++++++++++++++++++++++++ Doc/liburlparse.tex | 68 +++++++++++++++++++++++++ 25 files changed, 1090 insertions(+), 8 deletions(-) create mode 100644 Doc/lib/libcgi.tex create mode 100644 Doc/lib/libftplib.tex create mode 100644 Doc/lib/libgopherlib.tex create mode 100644 Doc/lib/libhtmllib.tex create mode 100644 Doc/lib/libhttplib.tex create mode 100644 Doc/lib/libmimetools.tex create mode 100644 Doc/lib/libnntplib.tex create mode 100644 Doc/lib/librfc822.tex create mode 100644 Doc/lib/libsgmllib.tex create mode 100644 Doc/lib/libtypes2.tex create mode 100644 Doc/lib/liburlparse.tex create mode 100644 Doc/libcgi.tex create mode 100644 Doc/libftplib.tex create mode 100644 Doc/libgopherlib.tex create mode 100644 Doc/libhtmllib.tex create mode 100644 Doc/libhttplib.tex create mode 100644 Doc/libmimetools.tex create mode 100644 Doc/libnntplib.tex create mode 100644 Doc/librfc822.tex create mode 100644 Doc/libsgmllib.tex create mode 100644 Doc/libtypes2.tex create mode 100644 Doc/liburlparse.tex diff --git a/Doc/Makefile b/Doc/Makefile index aef0fa5..a717fc2 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -32,23 +32,27 @@ ref.dvi: ref.tex ref1.tex ref2.tex ref3.tex ref4.tex ref5.tex ref6.tex \ LIBFILES = lib.tex \ libal.tex libaifc.tex libamoeba.tex libarray.tex libaudio.tex libaudioop.tex \ libbltin.tex \ -libcopy.tex libcrypto.tex \ +libcgi.tex libcopy.tex libcrypto.tex \ libdbm.tex \ libexcs.tex \ -libfcntl.tex libfl.tex libfm.tex libfuncs.tex \ -libgdbm.tex libgetopt.tex libgl.tex libgrp.tex \ +libfcntl.tex libfl.tex libfm.tex libftplib.tex libfuncs.tex \ +libgdbm.tex libgetopt.tex libgl.tex libgopherlib.tex libgrp.tex \ +libhtmllib.tex libhttplib.tex \ libimageop.tex libimgfile.tex libintro.tex \ libjpeg.tex \ libmac.tex libmain.tex libmarshal.tex libmath.tex \ - libmd5.tex libmm.tex libmods.tex libmpz.tex \ + libmd5.tex libmimetools.tex libmm.tex libmods.tex libmpz.tex \ +libnntplib.tex \ libobjs.tex libos.tex \ libpanel.tex libposix.tex libposixfile.tex libppath.tex libpickle.tex \ libpwd.tex \ -librand.tex libregex.tex libregsub.tex librgbimg.tex librotor.tex \ -libselect.tex libsgi.tex libshelve.tex libsocket.tex libstd.tex libstdwin.tex \ +librand.tex libregex.tex libregsub.tex \ + librfc822.tex librgbimg.tex librotor.tex \ +libselect.tex libsgi.tex libsgmllib.tex \ + libshelve.tex libsocket.tex libstd.tex libstdwin.tex \ libstring.tex libstruct.tex libsun.tex libsys.tex \ -libthread.tex libtime.tex libtypes.tex \ -libunix.tex \ +libthread.tex libtime.tex libtypes.tex libtypes2.tex \ +libunix.tex liburllib.tex liburlparse.tex \ libwhrandom.tex libwww.tex lib.dvi: $(LIBFILES) diff --git a/Doc/lib.tex b/Doc/lib.tex index 1396181..07f70cf 100644 --- a/Doc/lib.tex +++ b/Doc/lib.tex @@ -70,6 +70,7 @@ language. \input{libpickle} \input{libshelve} \input{libcopy} +\input{libtypes2} % types is already taken :-( \input{libunix} % UNIX ONLY \input{libdbm} @@ -86,6 +87,7 @@ language. \input{libthread} \input{libwww} % WWW EXTENSIONS +\input{libcgi} \input{libftplib} \input{libgopherlib} \input{libhtmllib} diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index 1396181..07f70cf 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -70,6 +70,7 @@ language. \input{libpickle} \input{libshelve} \input{libcopy} +\input{libtypes2} % types is already taken :-( \input{libunix} % UNIX ONLY \input{libdbm} @@ -86,6 +87,7 @@ language. \input{libthread} \input{libwww} % WWW EXTENSIONS +\input{libcgi} \input{libftplib} \input{libgopherlib} \input{libhtmllib} diff --git a/Doc/lib/libcgi.tex b/Doc/lib/libcgi.tex new file mode 100644 index 0000000..9d27644 --- /dev/null +++ b/Doc/lib/libcgi.tex @@ -0,0 +1,130 @@ +\section{Built-in module \sectcode{cgi}} +\stmodindex{cgi} +\indexii{WWW}{server} +\indexii{CGI}{protocol} +\indexii{HTTP}{protocol} +\indexii{MIME}{headers} +\index{URL} + +This module makes it easy to write Python scripts that run in a WWW +server using the Common Gateway Interface. It was written by Michael +McLay and subsequently modified by Steve Majewski and Guido van +Rossum. + +When a WWW server finds that a URL contains a reference to a file in a +particular subdirectory (usually \code{/cgibin}), it runs the file as +a subprocess. Information about the request such as the full URL, the +originating host etc., is passed to the subprocess in the shell +environment; additional input from the client may be read from +standard input. Standard output from the subprocess is sent back +across the network to the client as the response from the request. +The CGI protocol describes what the environment variables passed to +the subprocess mean and how the output should be formatted. The +official reference documentation for the CGI protocol can be found on +the World-Wide Web at +\code{}. The +\code{cgi} module was based on version 1.1 of the protocol and should +also work with version 1.0. + +The \code{cgi} module defines several classes that make it easy to +access the information passed to the subprocess from a Python script; +in particular, it knows how to parse the input sent by an HTML +``form'' using either a POST or a GET request (these are alternatives +for submitting forms in the HTTP protocol). + +The formatting of the output is so trivial that no additional support +is needed. All you need to do is print a minimal set of MIME headers +describing the output format, followed by a blank line and your actual +output. E.g. if you want to generate HTML, your script could start as +follows: + +\begin{verbatim} +# Header -- one or more lines: +print "Content-type: text/html" +# Blank line separating header from body: +print +# Body, in HTML format: +print "The Amazing SPAM Homepage!" +# etc... +\end{verbatim} + +The server will add some header lines of its own, but it won't touch +the output following the header. + +The \code{cgi} module defines the following functions: + +\begin{funcdesc}{parse}{} +Read and parse the form submitted to the script and return a +dictionary containing the form's fields. This should be called at +most once per script invocation, as it may consume standard input (if +the form was submitted through a POST request). The keys in the +resulting dictionary are the field names used in the submission; the +values are {\em lists} of the field values (since field name may be +used multiple times in a single form). As a side effect, it sets +\code{environ['QUERY_STRING']} to the raw query string, if it isn't +already set. +\end{funcdesc} + +\begin{funcdesc}{print_environ_usage}{} +Print a piece of HTML listing the environment variables that may be +set by the CGI protocol. +This is mainly useful when learning about writing CGI scripts. +\end{funcdesc} + +\begin{funcdesc}{print_environ}{} +Print a piece of HTML text showing the entire contents of the shell +environment. This is mainly useful when debugging a CGI script. +\end{funcdesc} + +\begin{funcdesc}{print_form}{form} +Print a piece of HTML text showing the contents of the \var{form}. +This is mainly useful when debugging a CGI script. +\end{funcdesc} + +\begin{funcdesc}{escape}{string} +Convert special characters in \var{string} to HTML escapes. In +particular, ``\code{\&}'' is replaced with ``\code{\&}'', +``\code{<}'' is replaced with ``\code{\<}'', and ``\code{>}'' is +replaced with ``\code{\>}''. This is useful when printing (almost) +arbitrary text in an HTML context. Note that for inclusion in quoted +tag attributes (e.g. \code{}), some additional +characters would have to be converted --- in particular the string +quote. There is currently no function that does this. +\end{funcdesc} + +The module defines the following classes. Since the base class +initializes itself by calling \code{parse()}, at most one instance of +at most one of these classes should be created per script invocation: + +\begin{funcdesc}{FormContentDict}{} +This class behaves like a (read-only) dictionary and has the same keys +and values as the dictionary returned by \code{parse()} (i.e. each +field name maps to a list of values). Additionally, it initializes +its data member \code{query_string} to the raw query sent from the +server. +\end{funcdesc} + +\begin{funcdesc}{SvFormContentDict}{} +This class, derived from \code{FormContentDict}, is a little more +user-friendly when you are expecting that each field name is only used +once in the form. When you access for a particular field (using +\code{form[fieldname]}), it will return the string value of that item +if it is unique, or raise \code{IndexError} if the field was specified +more than once in the form. (If the field wasn't specified at all, +\code{KeyError} is raised.) To access fields that are specified +multiple times, use \code{form.getlist(fieldname)}. The +\code{values()} and \code{items()} methods return mixed lists -- +containing strings for singly-defined fields, and lists of strings for +multiply-defined fields. +\end{funcdesc} + +(It currently defines some more classes, but these are experimental +and/or obsolescent, and are thus not documented --- see the source for +more informations.) + +The module defines the following variable: + +\begin{datadesc}{environ} +The shell environment, exactly as received from the http server. See +the CGI documentation for a description of the various fields. +\end{datadesc} diff --git a/Doc/lib/libftplib.tex b/Doc/lib/libftplib.tex new file mode 100644 index 0000000..105ccdf --- /dev/null +++ b/Doc/lib/libftplib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{ftplib}} +\stmodindex{ftplib} +To be provided. diff --git a/Doc/lib/libgopherlib.tex b/Doc/lib/libgopherlib.tex new file mode 100644 index 0000000..9b81e37 --- /dev/null +++ b/Doc/lib/libgopherlib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{gopherlib}} +\stmodindex{gopherlib} +To be provided. diff --git a/Doc/lib/libhtmllib.tex b/Doc/lib/libhtmllib.tex new file mode 100644 index 0000000..9ea10ee --- /dev/null +++ b/Doc/lib/libhtmllib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{htmllib}} +\stmodindex{htmllib} +To be provided. diff --git a/Doc/lib/libhttplib.tex b/Doc/lib/libhttplib.tex new file mode 100644 index 0000000..a284faa --- /dev/null +++ b/Doc/lib/libhttplib.tex @@ -0,0 +1,93 @@ +\section{Built-in module \sectcode{httplib}} +\stmodindex{httplib} +\index{HTTP} + +This module defines a class which implements the client side of the +HTTP protocol. It is normally not used directly --- the module +\code{urlllib} module uses it to handle URLs that use HTTP. +\stmodindex{urllib} + +The module defines one class, \code{HTTP}. An \code{HTTP} instance +represents one transaction with an HTTP server. It should be +instantiated passing it a host and optional port number. If no port +number is passed, the port is extracted from the host string if it has +the form \code{host:port}, else the default HTTP port (80) is used. +If no host is passed, no connection is made, and the \code{connect} +method should be used to connect to a server. + +Once an \code{HTTP} instance has been connected to an HTTP server, it +should be used as follows: + +\begin{enumerate} + +\item[1.] Make exactly one call to the \code{putrequest()} method. + +\item[2.] Make zero or more calls to the \code{putheader()} method. + +\item[3.] Call the \code{endheaders()} method (this can be omitted if +step 4. makes no calls). + +\item[4.] Optional calls to the \code{send()} method. + +\item[5.] Call the \code{getreply()} method. + +\item[6.] Call the \code{getfile()} method and read the data off the +file object that it returns. + +\end{enumerate} + +\code{HTTP} instances have the following methods: + +\begin{funcdesc}{set_debuglevel}{level} +Set the debugging level (the amount of debugging output printed). +The default debug level is \code{0}, meaning no debugging output is +printed. +\end{funcdesc} + +\begin{funcdesc}{connect}{host\optional{\, port}} +Connect to the server given by \var{host} and \var{port}. See the +intro for the default port. This should be called directly only if +the instance was instantiated without passing a host. +\end{funcdesc} + +\begin{funcdesc}{send}{data} +Send data to the server. This should be used directly only after the +\code{endheaders()} method has been called and before +\code{getreply()} has been called. +\end{funcdesc} + +\begin{funcdesc}{putrequest}{request\, selector} +This should be the first call after the connection to the server has +been made. It sends a line to the server consisting of the +\var{request} string, the \var{selector} string, and the HTTP version +(\code{HTTP/1.0}). +\end{funcdesc} + +\begin{funcdesc}{putheader}{header\, argument\optional{\, ...}} +Send an RFC-822 style header to the server. It sends a line to the +server consisting of the header, a colon and a space, and the first +argument. If more arguments are given, continuation lines are sent, +each consisting of a tab and an argument. +\end{funcdesc} + +\begin{funcdesc}{endheaders}{} +Send a blank line to the server, signalling the end of the headers. +\end{funcdesc} + +\begin{funcdesc}{getreply}{} +Complete the request by shutting down the sending end of the socket, +read the reply from the server, and return a triple (\var{replycode}, +\var{message}, \var{headers}). Here \var{replycode} is the integer +reply code from the request (e.g. \code{200} if the request was +handled properly); \var{message} is the message string corresponding +to the reply code; and \var{header} is an instance of the class +\code{rfc822.Message} containing the headers received from the server. +See the description of the \code{rfc822} module. +\stmodindex{rfc822} +\end{funcdesc} + +\begin{funcdesc}{getfile}{} +Return a file object from which the data returned by the server can be +read, using the \code{read()}, \code{readline()} or \code{readlines()} +methods. +\end{funcdesc} diff --git a/Doc/lib/libmimetools.tex b/Doc/lib/libmimetools.tex new file mode 100644 index 0000000..c32224b --- /dev/null +++ b/Doc/lib/libmimetools.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{mimetools}} +\stmodindex{mimetools} +To be provided. diff --git a/Doc/lib/libnntplib.tex b/Doc/lib/libnntplib.tex new file mode 100644 index 0000000..93e7ed1 --- /dev/null +++ b/Doc/lib/libnntplib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{nntplib}} +\stmodindex{nntplib} +To be provided. diff --git a/Doc/lib/librfc822.tex b/Doc/lib/librfc822.tex new file mode 100644 index 0000000..43a5cea --- /dev/null +++ b/Doc/lib/librfc822.tex @@ -0,0 +1,108 @@ +\section{Built-in module \sectcode{rfc822}} +\stmodindex{rfc822} + +This module defines a class, \code{Message}, which represents a +collection of ``email headers'' as defined by the Internet standard +RFC 822. It is used in various contexts, usually to read such headers +from a file. + +A \code{Message} instance is instantiated with an open file object as +parameter. Instantiation reads headers from the file up to a blank +line and stores them in the instance; after instantiation, the file is +positioned directly after the blank line that terminates the headers. + +Input lines as read from the file may either be terminated by CR-LF or +by a single linefeed; a terminating CR-LF is replaced by a single +linefeed before the line is stored. + +All header matching is done independent of upper or lower case; +e.g. \code{m['From']}, \code{m['from']} and \code{m['FROM']} all yield +the same result. + +A \code{Message} instance has the following methods: + +\begin{funcdesc}{rewindbody}{} +Seek to the start of the message body. This only works if the file +object is seekable. +\end{funcdesc} + +\begin{funcdesc}{getallmatchingheaders}{name} +Return a list of lines consisting of all headers whose header matches +\var{name}, if any. Each physical line, whether it is a continuation +line or not, is a separate list item. Return the empty list if no +header matches \var{name}. +\end{funcdesc} + +\begin{funcdesc}{getfirstmatchingheader}{name} +Return a list of lines comprising the first header matching +\var{name}, and its continuation line(s), if any. Return \code{None} +if there is no header matching \var{name}. +\end{funcdesc} + +\begin{funcdesc}{getrawheader}{name} +Return a single string consisting of the text after the colon in the +first header matching \var{name}. This includes leading whitespace, +the trailing linefeed, and internal linefeeds and whitespace if there +any continuation line(s) were present. Return \code{None} if there is +no header matching \var{name}. +\end{funcdesc} + +\begin{funcdesc}{getheader}{name} +Like \code{getrawheader(\var{name})}, but strip leading and trailing +whitespace (but not internal whitespace). +\end{funcdesc} + +\begin{funcdesc}{getaddr}{name} +Return a pair (full name, email address) parsed from the string +returned by \code{getheader(\var{name})}. If no header matching +\var{name} exists, return \code{None, None}; otherwise both the full +name and the address are (possibly empty )strings. + +Example: if \code{m}'s first \code{From} header contains the string +\code{'guido@cwi.nl (Guido van Rossum)'}, then +\code{m.getaddr('From')} will yield the pair +\code{('Guido van Rossum', 'guido\@cwi.nl')}. +If the header contained +\code{'Guido van Rossum '} instead, it would yield the +exact same result. +\end{funcdesc} + +\begin{funcdesc}{getaddrlist}{name} +This is similar to \code{getaddr(\var{list})}, but parses a header +containing a list of email addresses (e.g. a \code{To} header) and +returns a list of (full name, email address) pairs (even if there was +only one address in the header). If there is no header matching +\var{name}, return an empty list. + +XXX The current version of this function is not really correct. It +yields bogus results if a full name contains a comma. +\end{funcdesc} + +\begin{funcdesc}{getdate}{name} +Retrieve a header using \code{getheader} and parse it into a 9-tuple +compatible with \code{time.kmtime()}. If there is no header matching +\var{name}, or it is unparsable, return \code{None}. + +Date parsing appears to be a black art, and not all mailers adhere to +the standard. While it has been tested and found correct on a large +collection of email from many sources, it is still possible that this +function may occasionally yield an incorrect result. +\end{funcdesc} + +\code{Message} instances also support a read-only mapping interface. +In particular: \code{m[name]} is the same as \code{m.getheader(name)}; +and \code{len(m)}, \code{m.has_key(name)}, \code{m.keys()}, +\code{m.values()} and \code{m.items()} act as expected (and +consistently). + +Finally, \code{Message} instances have two public instance variables: + +\begin{datadesc}{headers} +A list containing the entire set of header lines, in the order in +which they were read. Each line contains a trailing newline. The +blank line terminating the headers is not contained in the list. +\end{datadesc} + +\begin{datadesc}{fp} +The file object passed at instantiation time. +\end{datadesc} diff --git a/Doc/lib/libsgmllib.tex b/Doc/lib/libsgmllib.tex new file mode 100644 index 0000000..03d9ba2 --- /dev/null +++ b/Doc/lib/libsgmllib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{sgmllib}} +\stmodindex{sgmllib} +To be provided. diff --git a/Doc/lib/libtypes2.tex b/Doc/lib/libtypes2.tex new file mode 100644 index 0000000..7c51bb9 --- /dev/null +++ b/Doc/lib/libtypes2.tex @@ -0,0 +1,120 @@ +\section{Built-in module \sectcode{types}} +\stmodindex{types} + +This module defines names for all object types that are used by the +standard Python interpreter (but not for the types defined by various +extension modules). It is safe to use \code{from types import *} --- +the module does not export any other names besides the ones listed +here. New names exported by future versions of this module will +all end in \code{Type}. + +Typical use is for functions that do different things depending on +their argument types, like the following: + +\begin{verbatim} +from types import * +def delete(list, item): + if type(item) is IntType: + del list[item] + else: + list.remove(item) +\end{verbatim} + +The module defines the following names: + +\begin{datadesc}{NoneType} +The type of \code{None}. +\end{datadesc} + +\begin{datadesc}{TypeType} +The type of type objects (such as returned by \code{type()}). +\end{datadesc} + +\begin{datadesc}{IntType} +The type of integers (e.g. \code{1}). +\end{datadesc} + +\begin{datadesc}{LongType} +The type of long integers (e.g. \code{1L}). +\end{datadesc} + +\begin{datadesc}{FloatType} +The type of floating point numbers (e.g. \code{1.0}). +\end{datadesc} + +\begin{datadesc}{StringType} +The type of character strings (e.g. \code{'Spam'}). +\end{datadesc} + +\begin{datadesc}{TupleType} +The type of tuples (e.g. \code{(1, 2, 3, 'Spam')}). +\end{datadesc} + +\begin{datadesc}{ListType} +The type of lists (e.g. \code{[0, 1, 2, 3]}). +\end{datadesc} + +\begin{datadesc}{DictType} +The type of dictionaries (e.g. \code{\{'Bacon': 1, 'Ham': 0\}}). +\end{datadesc} + +\begin{datadesc}{DictionaryType} +An alternative name for \code{DictType}. +\end{datadesc} + +\begin{datadesc}{FunctionType} +The type of user-defined functions and lambdas. +\end{datadesc} + +\begin{datadesc}{LambdaType} + An alternative name for \code{FunctionType}. +\end{datadesc} + +\begin{datadesc}{CodeType} +The type for code objects such as returned by \code{compile()}. +\end{datadesc} + +\begin{datadesc}{ClassType} +The type of user-defined classes. +\end{datadesc} + +\begin{datadesc}{InstanceType} +The type of instances of user-defined classes. +\end{datadesc} + +\begin{datadesc}{MethodType} +The type of methods of user-defined class instances. +\end{datadesc} + +\begin{datadesc}{UnboundMethodType} +An alternative name for \code{MethodType}. +\end{datadesc} + +\begin{datadesc}{BuiltinFunctionType} +The type of built-in functions like \code{len} or \code{sys.exit}. +\end{datadesc} + +\begin{datadesc}{BuiltinMethodType} +An alternative name for \code{BuiltinFunction}. +\end{datadesc} + +\begin{datadesc}{ModuleType} +The type of modules. +\end{datadesc} + +\begin{datadesc}{FileType} +The type of open file objects such as \code{sys.stdout}. +\end{datadesc} + +\begin{datadesc}{XRangeType} +The type of range objects returned by \code{xrange()}. +\end{datadesc} + +\begin{datadesc}{TracebackType} +The type of traceback objects such as found in \code{sys.exc_traceback}. +\end{datadesc} + +\begin{datadesc}{FrameType} +The type of frame objects such as found in \code{tb.tb_frame} if +\code{tb} is a traceback object. +\end{datadesc} diff --git a/Doc/lib/liburlparse.tex b/Doc/lib/liburlparse.tex new file mode 100644 index 0000000..8495437 --- /dev/null +++ b/Doc/lib/liburlparse.tex @@ -0,0 +1,68 @@ +\section{Built-in module \sectcode{urlparse}} +\stmodindex{urlparse} +\index{WWW} +\indexii{World-Wide}{Web} +\index{URL} +\indexii{URL}{parsing} +\indexii{relative}{URL} + +This module defines a standard interface to break URL strings up in +components (addessing scheme, network location, path etc.), to combine +the components back into a URL string, and to convert a ``relative +URL'' to an absolute URL given a ``base URL''. + +The module has been designed to match the current Internet draft on +Relative Uniform Resource Locators (and discovered a bug in an earlier +draft!). + +It defines the following functions: + +\begin{funcdesc}{urlparse}{urlstring\optional{\, +default_scheme\optional{\, allow_fragments}}} +Parse a URL into 6 components, returning a 6-tuple: (addressing +scheme, network location, path, parameters, query, fragment +identifier). This corresponds to the general structure of a URL: +\code{\var{scheme}://\var{netloc}/\var{path};\var{parameters}?\var{query}\#\var{fragment}}. +Each tuple item is a string, possibly empty. +The components are not broken up in smaller parts (e.g. the network +location is a single string), and \% escapes are not expanded. +The delimiters as shown above are not part of the tuple items, {\em +except} for a leading slash in the \var{path} component, which is +kept if present. + +Example: +\code{urlparse('http://www.cwi.nl:80/\%7eguido/Python.html')} +yields the tuple +\code{('http', 'www.cwi.nl:80', '/\%e7guido/Python.html', '', '', '')}. + +If the \var{default_scheme} argument is specified, it gives the +default addressing scheme, to be used only if the URL string does not +specify one. The default value for this argument is the empty string. + +If the \var{allow_fragments} argument is zero, fragment identifiers +are not allowed, even if the URL's addressing scheme normally does +support them. The default value for this argument is \code{1}. +\end{funcdesc} + +\begin{funcdesc}{urlunparse}{tuple} +Construct a URL string from a tuple as returned by \code{urlparse}. +This may result in a slightly different, but equivalent URL, if the +URL that was parsed originally had redundant delimiters, e.g. a ? with +an empty query (the draft states that these are equivalent). +\end{funcdesc} + +\begin{funcdesc}{urljoin}{base\, url\optional{\, allow_fragments}} +Construct a full (``absolute'') URL by combining a ``base URL'' +(\var{base}) with a ``relative URL'' (\var{url}). Informally, this +uses components of the base URL, in particular the addressing scheme, +the network location and (part of) the path, to provide missing +components in the relative URL. + +Example: +\code{urljoin('http://www.cwi.nl/\%7eguido/Python.html',} +\code{'FAQ.html')} yields the string +\code{'http://www.cwi.nl/\%7eguido/FAQ.html'}. + +The \var{allow_fragments} argument has the same meaning as for +\code{urlparse}. +\end{funcdesc} diff --git a/Doc/libcgi.tex b/Doc/libcgi.tex new file mode 100644 index 0000000..9d27644 --- /dev/null +++ b/Doc/libcgi.tex @@ -0,0 +1,130 @@ +\section{Built-in module \sectcode{cgi}} +\stmodindex{cgi} +\indexii{WWW}{server} +\indexii{CGI}{protocol} +\indexii{HTTP}{protocol} +\indexii{MIME}{headers} +\index{URL} + +This module makes it easy to write Python scripts that run in a WWW +server using the Common Gateway Interface. It was written by Michael +McLay and subsequently modified by Steve Majewski and Guido van +Rossum. + +When a WWW server finds that a URL contains a reference to a file in a +particular subdirectory (usually \code{/cgibin}), it runs the file as +a subprocess. Information about the request such as the full URL, the +originating host etc., is passed to the subprocess in the shell +environment; additional input from the client may be read from +standard input. Standard output from the subprocess is sent back +across the network to the client as the response from the request. +The CGI protocol describes what the environment variables passed to +the subprocess mean and how the output should be formatted. The +official reference documentation for the CGI protocol can be found on +the World-Wide Web at +\code{}. The +\code{cgi} module was based on version 1.1 of the protocol and should +also work with version 1.0. + +The \code{cgi} module defines several classes that make it easy to +access the information passed to the subprocess from a Python script; +in particular, it knows how to parse the input sent by an HTML +``form'' using either a POST or a GET request (these are alternatives +for submitting forms in the HTTP protocol). + +The formatting of the output is so trivial that no additional support +is needed. All you need to do is print a minimal set of MIME headers +describing the output format, followed by a blank line and your actual +output. E.g. if you want to generate HTML, your script could start as +follows: + +\begin{verbatim} +# Header -- one or more lines: +print "Content-type: text/html" +# Blank line separating header from body: +print +# Body, in HTML format: +print "The Amazing SPAM Homepage!" +# etc... +\end{verbatim} + +The server will add some header lines of its own, but it won't touch +the output following the header. + +The \code{cgi} module defines the following functions: + +\begin{funcdesc}{parse}{} +Read and parse the form submitted to the script and return a +dictionary containing the form's fields. This should be called at +most once per script invocation, as it may consume standard input (if +the form was submitted through a POST request). The keys in the +resulting dictionary are the field names used in the submission; the +values are {\em lists} of the field values (since field name may be +used multiple times in a single form). As a side effect, it sets +\code{environ['QUERY_STRING']} to the raw query string, if it isn't +already set. +\end{funcdesc} + +\begin{funcdesc}{print_environ_usage}{} +Print a piece of HTML listing the environment variables that may be +set by the CGI protocol. +This is mainly useful when learning about writing CGI scripts. +\end{funcdesc} + +\begin{funcdesc}{print_environ}{} +Print a piece of HTML text showing the entire contents of the shell +environment. This is mainly useful when debugging a CGI script. +\end{funcdesc} + +\begin{funcdesc}{print_form}{form} +Print a piece of HTML text showing the contents of the \var{form}. +This is mainly useful when debugging a CGI script. +\end{funcdesc} + +\begin{funcdesc}{escape}{string} +Convert special characters in \var{string} to HTML escapes. In +particular, ``\code{\&}'' is replaced with ``\code{\&}'', +``\code{<}'' is replaced with ``\code{\<}'', and ``\code{>}'' is +replaced with ``\code{\>}''. This is useful when printing (almost) +arbitrary text in an HTML context. Note that for inclusion in quoted +tag attributes (e.g. \code{}), some additional +characters would have to be converted --- in particular the string +quote. There is currently no function that does this. +\end{funcdesc} + +The module defines the following classes. Since the base class +initializes itself by calling \code{parse()}, at most one instance of +at most one of these classes should be created per script invocation: + +\begin{funcdesc}{FormContentDict}{} +This class behaves like a (read-only) dictionary and has the same keys +and values as the dictionary returned by \code{parse()} (i.e. each +field name maps to a list of values). Additionally, it initializes +its data member \code{query_string} to the raw query sent from the +server. +\end{funcdesc} + +\begin{funcdesc}{SvFormContentDict}{} +This class, derived from \code{FormContentDict}, is a little more +user-friendly when you are expecting that each field name is only used +once in the form. When you access for a particular field (using +\code{form[fieldname]}), it will return the string value of that item +if it is unique, or raise \code{IndexError} if the field was specified +more than once in the form. (If the field wasn't specified at all, +\code{KeyError} is raised.) To access fields that are specified +multiple times, use \code{form.getlist(fieldname)}. The +\code{values()} and \code{items()} methods return mixed lists -- +containing strings for singly-defined fields, and lists of strings for +multiply-defined fields. +\end{funcdesc} + +(It currently defines some more classes, but these are experimental +and/or obsolescent, and are thus not documented --- see the source for +more informations.) + +The module defines the following variable: + +\begin{datadesc}{environ} +The shell environment, exactly as received from the http server. See +the CGI documentation for a description of the various fields. +\end{datadesc} diff --git a/Doc/libftplib.tex b/Doc/libftplib.tex new file mode 100644 index 0000000..105ccdf --- /dev/null +++ b/Doc/libftplib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{ftplib}} +\stmodindex{ftplib} +To be provided. diff --git a/Doc/libgopherlib.tex b/Doc/libgopherlib.tex new file mode 100644 index 0000000..9b81e37 --- /dev/null +++ b/Doc/libgopherlib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{gopherlib}} +\stmodindex{gopherlib} +To be provided. diff --git a/Doc/libhtmllib.tex b/Doc/libhtmllib.tex new file mode 100644 index 0000000..9ea10ee --- /dev/null +++ b/Doc/libhtmllib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{htmllib}} +\stmodindex{htmllib} +To be provided. diff --git a/Doc/libhttplib.tex b/Doc/libhttplib.tex new file mode 100644 index 0000000..a284faa --- /dev/null +++ b/Doc/libhttplib.tex @@ -0,0 +1,93 @@ +\section{Built-in module \sectcode{httplib}} +\stmodindex{httplib} +\index{HTTP} + +This module defines a class which implements the client side of the +HTTP protocol. It is normally not used directly --- the module +\code{urlllib} module uses it to handle URLs that use HTTP. +\stmodindex{urllib} + +The module defines one class, \code{HTTP}. An \code{HTTP} instance +represents one transaction with an HTTP server. It should be +instantiated passing it a host and optional port number. If no port +number is passed, the port is extracted from the host string if it has +the form \code{host:port}, else the default HTTP port (80) is used. +If no host is passed, no connection is made, and the \code{connect} +method should be used to connect to a server. + +Once an \code{HTTP} instance has been connected to an HTTP server, it +should be used as follows: + +\begin{enumerate} + +\item[1.] Make exactly one call to the \code{putrequest()} method. + +\item[2.] Make zero or more calls to the \code{putheader()} method. + +\item[3.] Call the \code{endheaders()} method (this can be omitted if +step 4. makes no calls). + +\item[4.] Optional calls to the \code{send()} method. + +\item[5.] Call the \code{getreply()} method. + +\item[6.] Call the \code{getfile()} method and read the data off the +file object that it returns. + +\end{enumerate} + +\code{HTTP} instances have the following methods: + +\begin{funcdesc}{set_debuglevel}{level} +Set the debugging level (the amount of debugging output printed). +The default debug level is \code{0}, meaning no debugging output is +printed. +\end{funcdesc} + +\begin{funcdesc}{connect}{host\optional{\, port}} +Connect to the server given by \var{host} and \var{port}. See the +intro for the default port. This should be called directly only if +the instance was instantiated without passing a host. +\end{funcdesc} + +\begin{funcdesc}{send}{data} +Send data to the server. This should be used directly only after the +\code{endheaders()} method has been called and before +\code{getreply()} has been called. +\end{funcdesc} + +\begin{funcdesc}{putrequest}{request\, selector} +This should be the first call after the connection to the server has +been made. It sends a line to the server consisting of the +\var{request} string, the \var{selector} string, and the HTTP version +(\code{HTTP/1.0}). +\end{funcdesc} + +\begin{funcdesc}{putheader}{header\, argument\optional{\, ...}} +Send an RFC-822 style header to the server. It sends a line to the +server consisting of the header, a colon and a space, and the first +argument. If more arguments are given, continuation lines are sent, +each consisting of a tab and an argument. +\end{funcdesc} + +\begin{funcdesc}{endheaders}{} +Send a blank line to the server, signalling the end of the headers. +\end{funcdesc} + +\begin{funcdesc}{getreply}{} +Complete the request by shutting down the sending end of the socket, +read the reply from the server, and return a triple (\var{replycode}, +\var{message}, \var{headers}). Here \var{replycode} is the integer +reply code from the request (e.g. \code{200} if the request was +handled properly); \var{message} is the message string corresponding +to the reply code; and \var{header} is an instance of the class +\code{rfc822.Message} containing the headers received from the server. +See the description of the \code{rfc822} module. +\stmodindex{rfc822} +\end{funcdesc} + +\begin{funcdesc}{getfile}{} +Return a file object from which the data returned by the server can be +read, using the \code{read()}, \code{readline()} or \code{readlines()} +methods. +\end{funcdesc} diff --git a/Doc/libmimetools.tex b/Doc/libmimetools.tex new file mode 100644 index 0000000..c32224b --- /dev/null +++ b/Doc/libmimetools.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{mimetools}} +\stmodindex{mimetools} +To be provided. diff --git a/Doc/libnntplib.tex b/Doc/libnntplib.tex new file mode 100644 index 0000000..93e7ed1 --- /dev/null +++ b/Doc/libnntplib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{nntplib}} +\stmodindex{nntplib} +To be provided. diff --git a/Doc/librfc822.tex b/Doc/librfc822.tex new file mode 100644 index 0000000..43a5cea --- /dev/null +++ b/Doc/librfc822.tex @@ -0,0 +1,108 @@ +\section{Built-in module \sectcode{rfc822}} +\stmodindex{rfc822} + +This module defines a class, \code{Message}, which represents a +collection of ``email headers'' as defined by the Internet standard +RFC 822. It is used in various contexts, usually to read such headers +from a file. + +A \code{Message} instance is instantiated with an open file object as +parameter. Instantiation reads headers from the file up to a blank +line and stores them in the instance; after instantiation, the file is +positioned directly after the blank line that terminates the headers. + +Input lines as read from the file may either be terminated by CR-LF or +by a single linefeed; a terminating CR-LF is replaced by a single +linefeed before the line is stored. + +All header matching is done independent of upper or lower case; +e.g. \code{m['From']}, \code{m['from']} and \code{m['FROM']} all yield +the same result. + +A \code{Message} instance has the following methods: + +\begin{funcdesc}{rewindbody}{} +Seek to the start of the message body. This only works if the file +object is seekable. +\end{funcdesc} + +\begin{funcdesc}{getallmatchingheaders}{name} +Return a list of lines consisting of all headers whose header matches +\var{name}, if any. Each physical line, whether it is a continuation +line or not, is a separate list item. Return the empty list if no +header matches \var{name}. +\end{funcdesc} + +\begin{funcdesc}{getfirstmatchingheader}{name} +Return a list of lines comprising the first header matching +\var{name}, and its continuation line(s), if any. Return \code{None} +if there is no header matching \var{name}. +\end{funcdesc} + +\begin{funcdesc}{getrawheader}{name} +Return a single string consisting of the text after the colon in the +first header matching \var{name}. This includes leading whitespace, +the trailing linefeed, and internal linefeeds and whitespace if there +any continuation line(s) were present. Return \code{None} if there is +no header matching \var{name}. +\end{funcdesc} + +\begin{funcdesc}{getheader}{name} +Like \code{getrawheader(\var{name})}, but strip leading and trailing +whitespace (but not internal whitespace). +\end{funcdesc} + +\begin{funcdesc}{getaddr}{name} +Return a pair (full name, email address) parsed from the string +returned by \code{getheader(\var{name})}. If no header matching +\var{name} exists, return \code{None, None}; otherwise both the full +name and the address are (possibly empty )strings. + +Example: if \code{m}'s first \code{From} header contains the string +\code{'guido@cwi.nl (Guido van Rossum)'}, then +\code{m.getaddr('From')} will yield the pair +\code{('Guido van Rossum', 'guido\@cwi.nl')}. +If the header contained +\code{'Guido van Rossum '} instead, it would yield the +exact same result. +\end{funcdesc} + +\begin{funcdesc}{getaddrlist}{name} +This is similar to \code{getaddr(\var{list})}, but parses a header +containing a list of email addresses (e.g. a \code{To} header) and +returns a list of (full name, email address) pairs (even if there was +only one address in the header). If there is no header matching +\var{name}, return an empty list. + +XXX The current version of this function is not really correct. It +yields bogus results if a full name contains a comma. +\end{funcdesc} + +\begin{funcdesc}{getdate}{name} +Retrieve a header using \code{getheader} and parse it into a 9-tuple +compatible with \code{time.kmtime()}. If there is no header matching +\var{name}, or it is unparsable, return \code{None}. + +Date parsing appears to be a black art, and not all mailers adhere to +the standard. While it has been tested and found correct on a large +collection of email from many sources, it is still possible that this +function may occasionally yield an incorrect result. +\end{funcdesc} + +\code{Message} instances also support a read-only mapping interface. +In particular: \code{m[name]} is the same as \code{m.getheader(name)}; +and \code{len(m)}, \code{m.has_key(name)}, \code{m.keys()}, +\code{m.values()} and \code{m.items()} act as expected (and +consistently). + +Finally, \code{Message} instances have two public instance variables: + +\begin{datadesc}{headers} +A list containing the entire set of header lines, in the order in +which they were read. Each line contains a trailing newline. The +blank line terminating the headers is not contained in the list. +\end{datadesc} + +\begin{datadesc}{fp} +The file object passed at instantiation time. +\end{datadesc} diff --git a/Doc/libsgmllib.tex b/Doc/libsgmllib.tex new file mode 100644 index 0000000..03d9ba2 --- /dev/null +++ b/Doc/libsgmllib.tex @@ -0,0 +1,3 @@ +\section{Built-in module \sectcode{sgmllib}} +\stmodindex{sgmllib} +To be provided. diff --git a/Doc/libtypes2.tex b/Doc/libtypes2.tex new file mode 100644 index 0000000..7c51bb9 --- /dev/null +++ b/Doc/libtypes2.tex @@ -0,0 +1,120 @@ +\section{Built-in module \sectcode{types}} +\stmodindex{types} + +This module defines names for all object types that are used by the +standard Python interpreter (but not for the types defined by various +extension modules). It is safe to use \code{from types import *} --- +the module does not export any other names besides the ones listed +here. New names exported by future versions of this module will +all end in \code{Type}. + +Typical use is for functions that do different things depending on +their argument types, like the following: + +\begin{verbatim} +from types import * +def delete(list, item): + if type(item) is IntType: + del list[item] + else: + list.remove(item) +\end{verbatim} + +The module defines the following names: + +\begin{datadesc}{NoneType} +The type of \code{None}. +\end{datadesc} + +\begin{datadesc}{TypeType} +The type of type objects (such as returned by \code{type()}). +\end{datadesc} + +\begin{datadesc}{IntType} +The type of integers (e.g. \code{1}). +\end{datadesc} + +\begin{datadesc}{LongType} +The type of long integers (e.g. \code{1L}). +\end{datadesc} + +\begin{datadesc}{FloatType} +The type of floating point numbers (e.g. \code{1.0}). +\end{datadesc} + +\begin{datadesc}{StringType} +The type of character strings (e.g. \code{'Spam'}). +\end{datadesc} + +\begin{datadesc}{TupleType} +The type of tuples (e.g. \code{(1, 2, 3, 'Spam')}). +\end{datadesc} + +\begin{datadesc}{ListType} +The type of lists (e.g. \code{[0, 1, 2, 3]}). +\end{datadesc} + +\begin{datadesc}{DictType} +The type of dictionaries (e.g. \code{\{'Bacon': 1, 'Ham': 0\}}). +\end{datadesc} + +\begin{datadesc}{DictionaryType} +An alternative name for \code{DictType}. +\end{datadesc} + +\begin{datadesc}{FunctionType} +The type of user-defined functions and lambdas. +\end{datadesc} + +\begin{datadesc}{LambdaType} + An alternative name for \code{FunctionType}. +\end{datadesc} + +\begin{datadesc}{CodeType} +The type for code objects such as returned by \code{compile()}. +\end{datadesc} + +\begin{datadesc}{ClassType} +The type of user-defined classes. +\end{datadesc} + +\begin{datadesc}{InstanceType} +The type of instances of user-defined classes. +\end{datadesc} + +\begin{datadesc}{MethodType} +The type of methods of user-defined class instances. +\end{datadesc} + +\begin{datadesc}{UnboundMethodType} +An alternative name for \code{MethodType}. +\end{datadesc} + +\begin{datadesc}{BuiltinFunctionType} +The type of built-in functions like \code{len} or \code{sys.exit}. +\end{datadesc} + +\begin{datadesc}{BuiltinMethodType} +An alternative name for \code{BuiltinFunction}. +\end{datadesc} + +\begin{datadesc}{ModuleType} +The type of modules. +\end{datadesc} + +\begin{datadesc}{FileType} +The type of open file objects such as \code{sys.stdout}. +\end{datadesc} + +\begin{datadesc}{XRangeType} +The type of range objects returned by \code{xrange()}. +\end{datadesc} + +\begin{datadesc}{TracebackType} +The type of traceback objects such as found in \code{sys.exc_traceback}. +\end{datadesc} + +\begin{datadesc}{FrameType} +The type of frame objects such as found in \code{tb.tb_frame} if +\code{tb} is a traceback object. +\end{datadesc} diff --git a/Doc/liburlparse.tex b/Doc/liburlparse.tex new file mode 100644 index 0000000..8495437 --- /dev/null +++ b/Doc/liburlparse.tex @@ -0,0 +1,68 @@ +\section{Built-in module \sectcode{urlparse}} +\stmodindex{urlparse} +\index{WWW} +\indexii{World-Wide}{Web} +\index{URL} +\indexii{URL}{parsing} +\indexii{relative}{URL} + +This module defines a standard interface to break URL strings up in +components (addessing scheme, network location, path etc.), to combine +the components back into a URL string, and to convert a ``relative +URL'' to an absolute URL given a ``base URL''. + +The module has been designed to match the current Internet draft on +Relative Uniform Resource Locators (and discovered a bug in an earlier +draft!). + +It defines the following functions: + +\begin{funcdesc}{urlparse}{urlstring\optional{\, +default_scheme\optional{\, allow_fragments}}} +Parse a URL into 6 components, returning a 6-tuple: (addressing +scheme, network location, path, parameters, query, fragment +identifier). This corresponds to the general structure of a URL: +\code{\var{scheme}://\var{netloc}/\var{path};\var{parameters}?\var{query}\#\var{fragment}}. +Each tuple item is a string, possibly empty. +The components are not broken up in smaller parts (e.g. the network +location is a single string), and \% escapes are not expanded. +The delimiters as shown above are not part of the tuple items, {\em +except} for a leading slash in the \var{path} component, which is +kept if present. + +Example: +\code{urlparse('http://www.cwi.nl:80/\%7eguido/Python.html')} +yields the tuple +\code{('http', 'www.cwi.nl:80', '/\%e7guido/Python.html', '', '', '')}. + +If the \var{default_scheme} argument is specified, it gives the +default addressing scheme, to be used only if the URL string does not +specify one. The default value for this argument is the empty string. + +If the \var{allow_fragments} argument is zero, fragment identifiers +are not allowed, even if the URL's addressing scheme normally does +support them. The default value for this argument is \code{1}. +\end{funcdesc} + +\begin{funcdesc}{urlunparse}{tuple} +Construct a URL string from a tuple as returned by \code{urlparse}. +This may result in a slightly different, but equivalent URL, if the +URL that was parsed originally had redundant delimiters, e.g. a ? with +an empty query (the draft states that these are equivalent). +\end{funcdesc} + +\begin{funcdesc}{urljoin}{base\, url\optional{\, allow_fragments}} +Construct a full (``absolute'') URL by combining a ``base URL'' +(\var{base}) with a ``relative URL'' (\var{url}). Informally, this +uses components of the base URL, in particular the addressing scheme, +the network location and (part of) the path, to provide missing +components in the relative URL. + +Example: +\code{urljoin('http://www.cwi.nl/\%7eguido/Python.html',} +\code{'FAQ.html')} yields the string +\code{'http://www.cwi.nl/\%7eguido/FAQ.html'}. + +The \var{allow_fragments} argument has the same meaning as for +\code{urlparse}. +\end{funcdesc} -- cgit v0.12