diff options
Diffstat (limited to 'Doc')
95 files changed, 6646 insertions, 624 deletions
@@ -195,6 +195,7 @@ Dan Wolfe Steven Work Thomas Wouters Ka-Ping Yee +Rory Yorke Moshe Zadka Milan Zamazal Cheng Zhang diff --git a/Doc/Makefile.deps b/Doc/Makefile.deps index 20c0688..11c6de0 100644 --- a/Doc/Makefile.deps +++ b/Doc/Makefile.deps @@ -155,6 +155,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ lib/required_2.py \ lib/libtempfile.tex \ lib/liberrno.tex \ + lib/libctypes.tex \ lib/libsomeos.tex \ lib/libsignal.tex \ lib/libsocket.tex \ @@ -179,6 +180,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ lib/libprofile.tex \ lib/libhotshot.tex \ lib/libtimeit.tex \ + lib/libtrace.tex \ lib/libcgi.tex \ lib/libcgitb.tex \ lib/liburllib.tex \ @@ -306,6 +308,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ lib/libgetpass.tex \ lib/libshutil.tex \ lib/librepr.tex \ + lib/libmsilib.tex \ lib/libmsvcrt.tex \ lib/libwinreg.tex \ lib/libwinsound.tex \ @@ -348,7 +351,8 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ lib/libturtle.tex \ lib/libtarfile.tex \ lib/libcsv.tex \ - lib/libcfgparser.tex + lib/libcfgparser.tex \ + lib/libsqlite3.tex # LaTeX source files for Macintosh Library Modules. MACFILES= $(HOWTOSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ diff --git a/Doc/api/abstract.tex b/Doc/api/abstract.tex index f50ebc4..7c742a0 100644 --- a/Doc/api/abstract.tex +++ b/Doc/api/abstract.tex @@ -255,6 +255,8 @@ determination. \NULL, indicating that no arguments are provided. Returns the result of the call on success, or \NULL{} on failure. This is the equivalent of the Python expression \samp{\var{callable}(*\var{args})}. + Note that if you only pass \ctype{PyObject *} args, + \cfunction{PyObject_CallFunctionObjArgs} is a faster alternative. \end{cfuncdesc} @@ -268,6 +270,8 @@ determination. indicating that no arguments are provided. Returns the result of the call on success, or \NULL{} on failure. This is the equivalent of the Python expression \samp{\var{o}.\var{method}(\var{args})}. + Note that if you only pass \ctype{PyObject *} args, + \cfunction{PyObject_CallMethodObjArgs} is a faster alternative. \end{cfuncdesc} @@ -624,7 +628,7 @@ determination. Returns the result of right shifting \var{o1} by \var{o2} on success, or \NULL{} on failure. The operation is done \emph{in-place} when \var{o1} supports it. This is the equivalent - of the Python statement \samp{\var{o1} >\code{>=} \var{o2}}. + of the Python statement \samp{\var{o1} >>= \var{o2}}. \end{cfuncdesc} diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index 1982bae..10247ab 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -245,7 +245,7 @@ booleans. The following macros are available, however. \end{csimplemacrodesc} \begin{cfuncdesc}{PyObject*}{PyBool_FromLong}{long v} - Return a new reference to \constant{Py_True} or \constant{Py_False} + Return a new reference to \constant{Py_True} or \constant{Py_False} depending on the truth value of \var{v}. \versionadded{2.3} \end{cfuncdesc} @@ -618,12 +618,24 @@ parameter and are called with a non-string parameter. exactly to the format characters in the \var{format} string. The following format characters are allowed: + % This should be exactly the same as the table in PyErr_Format. + % One should just refer to the other. + + % The descriptions for %zd and %zu are wrong, but the truth is complicated + % because not all compilers support the %z width modifier -- we fake it + % when necessary via interpolating PY_FORMAT_SIZE_T. + + % %u, %lu, %zu should have "new in Python 2.5" blurbs. + \begin{tableiii}{l|l|l}{member}{Format Characters}{Type}{Comment} \lineiii{\%\%}{\emph{n/a}}{The literal \% character.} \lineiii{\%c}{int}{A single character, represented as an C int.} \lineiii{\%d}{int}{Exactly equivalent to \code{printf("\%d")}.} + \lineiii{\%u}{unsigned int}{Exactly equivalent to \code{printf("\%u")}.} \lineiii{\%ld}{long}{Exactly equivalent to \code{printf("\%ld")}.} - \lineiii{\%zd}{long}{Exactly equivalent to \code{printf("\%zd")}.} + \lineiii{\%lu}{unsigned long}{Exactly equivalent to \code{printf("\%lu")}.} + \lineiii{\%zd}{Py_ssize_t}{Exactly equivalent to \code{printf("\%zd")}.} + \lineiii{\%zu}{size_t}{Exactly equivalent to \code{printf("\%zu")}.} \lineiii{\%i}{int}{Exactly equivalent to \code{printf("\%i")}.} \lineiii{\%x}{int}{Exactly equivalent to \code{printf("\%x")}.} \lineiii{\%s}{char*}{A null-terminated C character array.} @@ -632,6 +644,10 @@ parameter and are called with a non-string parameter. guaranteed to start with the literal \code{0x} regardless of what the platform's \code{printf} yields.} \end{tableiii} + + An unrecognized format character causes all the rest of the format + string to be copied as-is to the result string, and any extra + arguments discarded. \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PyString_FromFormatV}{const char *format, @@ -687,7 +703,7 @@ parameter and are called with a non-string parameter. \var{size})}. It must not be deallocated. If \var{string} is a Unicode object, this function computes the default encoding of \var{string} and operates on that. If \var{string} is not a string - object at all, \cfunction{PyString_AsStringAndSize()} returns + object at all, \cfunction{PyString_AsStringAndSize()} returns \code{-1} and raises \exception{TypeError}. \end{cfuncdesc} @@ -949,7 +965,7 @@ These APIs can be used for fast direct character conversions: \end{cfuncdesc} \begin{cfuncdesc}{double}{Py_UNICODE_TONUMERIC}{Py_UNICODE ch} - Return the character \var{ch} converted to a (positive) double. + Return the character \var{ch} converted to a double. Return \code{-1.0} if this is not possible. This macro does not raise exceptions. \end{cfuncdesc} @@ -1393,7 +1409,7 @@ The following codec API is special in that maps Unicode to Unicode. The \var{mapping} table must map Unicode ordinal integers to Unicode ordinal integers or None (causing deletion of the character). - Mapping tables need only provide the method{__getitem__()} + Mapping tables need only provide the \method{__getitem__()} interface; dictionaries and sequences work well. Unmapped character ordinals (ones which cause a \exception{LookupError}) are left untouched and are copied as-is. @@ -1494,7 +1510,7 @@ They all return \NULL{} or \code{-1} if an exception occurs. Return 1 if \var{substr} matches \var{str}[\var{start}:\var{end}] at the given tail end (\var{direction} == -1 means to do a prefix match, \var{direction} == 1 a suffix match), 0 otherwise. - Return \code{-1} if an error occurred. + Return \code{-1} if an error occurred. \end{cfuncdesc} \begin{cfuncdesc}{Py_ssize_t}{PyUnicode_Find}{PyObject *str, @@ -3013,7 +3029,7 @@ Macros for the convenience of modules implementing the DB API: \subsection{Set Objects \label{setObjects}} -\sectionauthor{Raymond D. Hettinger}{python@rcn.com} +\sectionauthor{Raymond D. Hettinger}{python@rcn.com} \obindex{set} \obindex{frozenset} @@ -3022,8 +3038,8 @@ Macros for the convenience of modules implementing the DB API: This section details the public API for \class{set} and \class{frozenset} objects. Any functionality not listed below is best accessed using the either the abstract object protocol (including -\cfunction{PyObject_CallMethod()}, \cfunction{PyObject_RichCompareBool()}, -\cfunction{PyObject_Hash()}, \cfunction{PyObject_Repr()}, +\cfunction{PyObject_CallMethod()}, \cfunction{PyObject_RichCompareBool()}, +\cfunction{PyObject_Hash()}, \cfunction{PyObject_Repr()}, \cfunction{PyObject_IsTrue()}, \cfunction{PyObject_Print()}, and \cfunction{PyObject_GetIter()}) or the abstract number protocol (including @@ -3040,7 +3056,7 @@ or the abstract number protocol (including block of memory for medium and large sized sets (much like list storage). None of the fields of this structure should be considered public and are subject to change. All access should be done through the - documented API rather than by manipulating the values in the structure. + documented API rather than by manipulating the values in the structure. \end{ctypedesc} @@ -3059,7 +3075,7 @@ The following type check macros work on pointers to any Python object. Likewise, the constructor functions work with any iterable Python object. \begin{cfuncdesc}{int}{PyAnySet_Check}{PyObject *p} - Return true if \var{p} is a \class{set} object, a \class{frozenset} + Return true if \var{p} is a \class{set} object, a \class{frozenset} object, or an instance of a subtype. \end{cfuncdesc} @@ -3112,7 +3128,7 @@ The following functions and macros are available for instances of function does not automatically convert unhashable sets into temporary frozensets. Raise a \exception{TypeError} if the \var{key} is unhashable. Raise \exception{PyExc_SystemError} if \var{anyset} is not a \class{set}, - \class{frozenset}, or an instance of a subtype. + \class{frozenset}, or an instance of a subtype. \end{cfuncdesc} The following functions are available for instances of \class{set} or @@ -3134,7 +3150,7 @@ its subtypes but not for instances of \class{frozenset} or its subtypes. unhashable. Unlike the Python \method{discard()} method, this function does not automatically convert unhashable sets into temporary frozensets. Raise \exception{PyExc_SystemError} if \var{set} is an not an instance - of \class{set} or its subtype. + of \class{set} or its subtype. \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PySet_Pop}{PyObject *set} @@ -3142,7 +3158,7 @@ its subtypes but not for instances of \class{frozenset} or its subtypes. and removes the object from the \var{set}. Return \NULL{} on failure. Raise \exception{KeyError} if the set is empty. Raise a \exception{SystemError} if \var{set} is an not an instance - of \class{set} or its subtype. + of \class{set} or its subtype. \end{cfuncdesc} \begin{cfuncdesc}{int}{PySet_Clear}{PyObject *set} diff --git a/Doc/api/exceptions.tex b/Doc/api/exceptions.tex index 62f713b..6dbe818 100644 --- a/Doc/api/exceptions.tex +++ b/Doc/api/exceptions.tex @@ -132,13 +132,32 @@ error indicator for each thread. codes, similar to \cfunction{printf()}. The \code{width.precision} before a format code is parsed, but the width part is ignored. - \begin{tableii}{c|l}{character}{Character}{Meaning} - \lineii{c}{Character, as an \ctype{int} parameter} - \lineii{d}{Number in decimal, as an \ctype{int} parameter} - \lineii{x}{Number in hexadecimal, as an \ctype{int} parameter} - \lineii{s}{A string, as a \ctype{char *} parameter} - \lineii{p}{A hex pointer, as a \ctype{void *} parameter} - \end{tableii} + % This should be exactly the same as the table in PyString_FromFormat. + % One should just refer to the other. + + % The descriptions for %zd and %zu are wrong, but the truth is complicated + % because not all compilers support the %z width modifier -- we fake it + % when necessary via interpolating PY_FORMAT_SIZE_T. + + % %u, %lu, %zu should have "new in Python 2.5" blurbs. + + \begin{tableiii}{l|l|l}{member}{Format Characters}{Type}{Comment} + \lineiii{\%\%}{\emph{n/a}}{The literal \% character.} + \lineiii{\%c}{int}{A single character, represented as an C int.} + \lineiii{\%d}{int}{Exactly equivalent to \code{printf("\%d")}.} + \lineiii{\%u}{unsigned int}{Exactly equivalent to \code{printf("\%u")}.} + \lineiii{\%ld}{long}{Exactly equivalent to \code{printf("\%ld")}.} + \lineiii{\%lu}{unsigned long}{Exactly equivalent to \code{printf("\%lu")}.} + \lineiii{\%zd}{Py_ssize_t}{Exactly equivalent to \code{printf("\%zd")}.} + \lineiii{\%zu}{size_t}{Exactly equivalent to \code{printf("\%zu")}.} + \lineiii{\%i}{int}{Exactly equivalent to \code{printf("\%i")}.} + \lineiii{\%x}{int}{Exactly equivalent to \code{printf("\%x")}.} + \lineiii{\%s}{char*}{A null-terminated C character array.} + \lineiii{\%p}{void*}{The hex representation of a C pointer. + Mostly equivalent to \code{printf("\%p")} except that it is + guaranteed to start with the literal \code{0x} regardless of + what the platform's \code{printf} yields.} + \end{tableiii} An unrecognized format character causes all the rest of the format string to be copied as-is to the result string, and any extra @@ -272,8 +291,8 @@ error indicator for each thread. command line documentation. There is no C API for warning control. \end{cfuncdesc} -\begin{cfuncdesc}{int}{PyErr_WarnExplicit}{PyObject *category, - const char *message, const char *filename, int lineno, +\begin{cfuncdesc}{int}{PyErr_WarnExplicit}{PyObject *category, + const char *message, const char *filename, int lineno, const char *module, PyObject *registry} Issue a warning message with explicit control over all warning attributes. This is a straightforward wrapper around the Python @@ -314,12 +333,14 @@ error indicator for each thread. The \var{name} argument must be the name of the new exception, a C string of the form \code{module.class}. The \var{base} and \var{dict} arguments are normally \NULL. This creates a class - object derived from the root for all exceptions, the built-in name - \exception{Exception} (accessible in C as \cdata{PyExc_Exception}). + object derived from \exception{Exception} (accessible in C as + \cdata{PyExc_Exception}). + The \member{__module__} attribute of the new class is set to the first part (up to the last dot) of the \var{name} argument, and the class name is set to the last part (after the last dot). The - \var{base} argument can be used to specify an alternate base class. + \var{base} argument can be used to specify alternate base classes; + it can either be only one class or a tuple of classes. The \var{dict} argument can be used to specify a dictionary of class variables and methods. \end{cfuncdesc} @@ -399,5 +420,5 @@ are derived from \exception{BaseException}. \withsubitem{(built-in exception)}{\ttindex{BaseException}} String exceptions are still supported in the interpreter to allow -existing code to run unmodified, but this will also change in a future +existing code to run unmodified, but this will also change in a future release. diff --git a/Doc/api/newtypes.tex b/Doc/api/newtypes.tex index 2d758b0..28f77f7 100644 --- a/Doc/api/newtypes.tex +++ b/Doc/api/newtypes.tex @@ -883,8 +883,39 @@ The following three fields only exist if the \begin{cmemberdesc}{PyTypeObject}{traverseproc}{tp_traverse} An optional pointer to a traversal function for the garbage collector. This is only used if the \constant{Py_TPFLAGS_HAVE_GC} - flag bit is set. More information in section - \ref{supporting-cycle-detection} about garbage collection. + flag bit is set. More information about Python's garbage collection + scheme can be found in section \ref{supporting-cycle-detection}. + + The \member{tp_traverse} pointer is used by the garbage collector + to detect reference cycles. A typical implementation of a + \member{tp_traverse} function simply calls \cfunction{Py_VISIT()} on + each of the instance's members that are Python objects. For exampe, this + is function \cfunction{local_traverse} from the \module{thread} extension + module: + + \begin{verbatim} + static int + local_traverse(localobject *self, visitproc visit, void *arg) + { + Py_VISIT(self->args); + Py_VISIT(self->kw); + Py_VISIT(self->dict); + return 0; + } + \end{verbatim} + + Note that \cfunction{Py_VISIT()} is called only on those members that can + participate in reference cycles. Although there is also a + \samp{self->key} member, it can only be \NULL{} or a Python string and + therefore cannot be part of a reference cycle. + + On the other hand, even if you know a member can never be part of a cycle, + as a debugging aid you may want to visit it anyway just so the + \module{gc} module's \function{get_referents()} function will include it. + + Note that \cfunction{Py_VISIT()} requires the \var{visit} and \var{arg} + parameters to \cfunction{local_traverse} to have these specific names; + don't name them just anything. This field is inherited by subtypes together with \member{tp_clear} and the \constant{Py_TPFLAGS_HAVE_GC} flag bit: the flag bit, @@ -896,8 +927,57 @@ The following three fields only exist if the \begin{cmemberdesc}{PyTypeObject}{inquiry}{tp_clear} An optional pointer to a clear function for the garbage collector. This is only used if the \constant{Py_TPFLAGS_HAVE_GC} flag bit is - set. More information in section - \ref{supporting-cycle-detection} about garbage collection. + set. + + The \member{tp_clear} member function is used to break reference + cycles in cyclic garbage detected by the garbage collector. Taken + together, all \member{tp_clear} functions in the system must combine to + break all reference cycles. This is subtle, and if in any doubt supply a + \member{tp_clear} function. For example, the tuple type does not + implement a \member{tp_clear} function, because it's possible to prove + that no reference cycle can be composed entirely of tuples. Therefore + the \member{tp_clear} functions of other types must be sufficient to + break any cycle containing a tuple. This isn't immediately obvious, and + there's rarely a good reason to avoid implementing \member{tp_clear}. + + Implementations of \member{tp_clear} should drop the instance's + references to those of its members that may be Python objects, and set + its pointers to those members to \NULL{}, as in the following example: + + \begin{verbatim} + static int + local_clear(localobject *self) + { + Py_CLEAR(self->key); + Py_CLEAR(self->args); + Py_CLEAR(self->kw); + Py_CLEAR(self->dict); + return 0; + } + \end{verbatim} + + The \cfunction{Py_CLEAR()} macro should be used, because clearing + references is delicate: the reference to the contained object must not be + decremented until after the pointer to the contained object is set to + \NULL{}. This is because decrementing the reference count may cause + the contained object to become trash, triggering a chain of reclamation + activity that may include invoking arbitrary Python code (due to + finalizers, or weakref callbacks, associated with the contained object). + If it's possible for such code to reference \var{self} again, it's + important that the pointer to the contained object be \NULL{} at that + time, so that \var{self} knows the contained object can no longer be + used. The \cfunction{Py_CLEAR()} macro performs the operations in a + safe order. + + Because the goal of \member{tp_clear} functions is to break reference + cycles, it's not necessary to clear contained objects like Python strings + or Python integers, which can't participate in reference cycles. + On the other hand, it may be convenient to clear all contained Python + objects, and write the type's \member{tp_dealloc} function to + invoke \member{tp_clear}. + + More information about Python's garbage collection + scheme can be found in section \ref{supporting-cycle-detection}. This field is inherited by subtypes together with \member{tp_clear} and the \constant{Py_TPFLAGS_HAVE_GC} flag bit: the flag bit, @@ -910,10 +990,10 @@ The following three fields only exist if the An optional pointer to the rich comparison function. The signature is the same as for \cfunction{PyObject_RichCompare()}. - The function should return \code{1} if the requested comparison - returns true, \code{0} if it returns false. It should return - \code{-1} and set an exception condition when an error occurred - during the comparison. + The function should return the result of the comparison (usually + \code{Py_True} or \code{Py_False}). If the comparison is undefined, + it must return \code{Py_NotImplemented}, if another error occurred + it must return \code{NULL} and set an exception condition. This field is inherited by subtypes together with \member{tp_compare} and \member{tp_hash}: a subtype inherits all diff --git a/Doc/api/refcounts.dat b/Doc/api/refcounts.dat index 7bba011..ab6d865 100644 --- a/Doc/api/refcounts.dat +++ b/Doc/api/refcounts.dat @@ -1147,7 +1147,7 @@ PySet_Discard:PyObject*:key:-1:no effect if key not found PySet_New:PyObject*::+1: PySet_New:PyObject*:iterable:0: -PySet_Pop:PyObject*::0:or returns NULL and raises KeyError if set is empty +PySet_Pop:PyObject*::+1:or returns NULL and raises KeyError if set is empty PySet_Pop:PyObject*:set:0: PySet_Size:int::: diff --git a/Doc/commontex/boilerplate.tex b/Doc/commontex/boilerplate.tex index 55a4184..b4c9f48 100644 --- a/Doc/commontex/boilerplate.tex +++ b/Doc/commontex/boilerplate.tex @@ -5,5 +5,5 @@ Email: \email{docs@python.org} } -\date{5th April 2006} % XXX update before final release! +\date{\today} % XXX update before final release! \input{patchlevel} % include Python version information diff --git a/Doc/dist/dist.tex b/Doc/dist/dist.tex index 3ba51d0..9970ec2 100644 --- a/Doc/dist/dist.tex +++ b/Doc/dist/dist.tex @@ -1760,16 +1760,16 @@ The \command{upload} command uses the username, password, and repository URL from the \file{\$HOME/.pypirc} file (see section~\ref{pypirc} for more on this file). -You can use the \programopt{--sign} option to tell \command{upload} to +You can use the \longprogramopt{sign} option to tell \command{upload} to sign each uploaded file using GPG (GNU Privacy Guard). The \program{gpg} program must be available for execution on the system \envvar{PATH}. You can also specify which key to use for signing -using the \programopt{--identity=\var{name}} option. +using the \longprogramopt{identity=\var{name}} option. Other \command{upload} options include -\programopt{--repository=\var{url}} (which lets you override the +\longprogramopt{repository=\var{url}} (which lets you override the repository setting from \file{\$HOME/.pypirc}), and -\programopt{--show-response} (which displays the full response text +\longprogramopt{show-response} (which displays the full response text from the PyPI server for help in debugging upload problems). \chapter{Examples} diff --git a/Doc/ext/windows.tex b/Doc/ext/windows.tex index a821094..ca18a1e 100644 --- a/Doc/ext/windows.tex +++ b/Doc/ext/windows.tex @@ -88,7 +88,7 @@ described here are distributed with the Python sources in the Once the Debug build has succeeded, bring up a DOS box, and change to the \file{example_nt\textbackslash Debug} directory. You should now be able to repeat the following session (\code{C>} is - the DOS prompt, \code{>\code{>}>} is the Python prompt; note that + the DOS prompt, \code{>>>} is the Python prompt; note that build information and various debug output from Python may not match this screen dump exactly): diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst index 0946bdc..f92471a 100644 --- a/Doc/howto/unicode.rst +++ b/Doc/howto/unicode.rst @@ -158,7 +158,7 @@ that are more efficient and convenient. Encodings don't have to handle every possible Unicode character, and most encodings don't. For example, Python's default encoding is the 'ascii' encoding. The rules for converting a Unicode string into the -ASCII encoding are are simple; for each code point: +ASCII encoding are simple; for each code point: 1. If the code point is <128, each byte is the same as the value of the code point. @@ -721,7 +721,7 @@ Revision History and Acknowledgements Thanks to the following people who have noted errors or offered suggestions on this article: Nicholas Bastin, Marius Gedminas, Kent Johnson, Ken Krugler, -Marc-André Lemburg, Martin von Löwis. +Marc-André Lemburg, Martin von Löwis, Chad Whitacre. Version 1.0: posted August 5 2005. diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst new file mode 100644 index 0000000..69ce508 --- /dev/null +++ b/Doc/howto/urllib2.rst @@ -0,0 +1,598 @@ +============================================== + HOWTO Fetch Internet Resources Using urllib2 +============================================== +---------------------------- + Fetching URLs With Python +---------------------------- + + +.. note:: + + There is an French translation of an earlier revision of this + HOWTO, available at `urllib2 - Le Manuel manquant + <http://www.voidspace/python/articles/urllib2_francais.shtml>`_. + +.. contents:: urllib2 Tutorial + + +Introduction +============ + +.. sidebar:: Related Articles + + You may also find useful the following article on fetching web + resources with Python : + + * `Basic Authentication <http://www.voidspace.org.uk/python/articles/authentication.shtml>`_ + + A tutorial on *Basic Authentication*, with examples in Python. + + This HOWTO is written by `Michael Foord + <http://www.voidspace.org.uk/python/index.shtml>`_. + +**urllib2** is a `Python <http://www.python.org>`_ module for fetching URLs +(Uniform Resource Locators). It offers a very simple interface, in the form of +the *urlopen* function. This is capable of fetching URLs using a variety +of different protocols. It also offers a slightly more complex +interface for handling common situations - like basic authentication, +cookies, proxies and so on. These are provided by objects called +handlers and openers. + +urllib2 supports fetching URLs for many "URL schemes" (identified by the string +before the ":" in URL - for example "ftp" is the URL scheme of +"ftp://python.org/") using their associated network protocols (e.g. FTP, HTTP). +This tutorial focuses on the most common case, HTTP. + +For straightforward situations *urlopen* is very easy to use. But as +soon as you encounter errors or non-trivial cases when opening HTTP +URLs, you will need some understanding of the HyperText Transfer +Protocol. The most comprehensive and authoritative reference to HTTP +is :RFC:`2616`. This is a technical document and not intended to be +easy to read. This HOWTO aims to illustrate using *urllib2*, with +enough detail about HTTP to help you through. It is not intended to +replace the `urllib2 docs <http://docs.python.org/lib/module-urllib2.html>`_ , +but is supplementary to them. + + +Fetching URLs +============= + +The simplest way to use urllib2 is as follows : :: + + import urllib2 + response = urllib2.urlopen('http://python.org/') + html = response.read() + +Many uses of urllib2 will be that simple (note that instead of an +'http:' URL we could have used an URL starting with 'ftp:', 'file:', +etc.). However, it's the purpose of this tutorial to explain the more +complicated cases, concentrating on HTTP. + +HTTP is based on requests and responses - the client makes requests +and servers send responses. urllib2 mirrors this with a ``Request`` +object which represents the HTTP request you are making. In its +simplest form you create a Request object that specifies the URL you +want to fetch. Calling ``urlopen`` with this Request object returns a +response object for the URL requested. This response is a file-like +object, which means you can for example call .read() on the response : +:: + + import urllib2 + + req = urllib2.Request('http://www.voidspace.org.uk') + response = urllib2.urlopen(req) + the_page = response.read() + +Note that urllib2 makes use of the same Request interface to handle +all URL schemes. For example, you can make an FTP request like so: :: + + req = urllib2.Request('ftp://example.com/') + +In the case of HTTP, there are two extra things that Request objects +allow you to do: First, you can pass data to be sent to the server. +Second, you can pass extra information ("metadata") *about* the data +or the about request itself, to the server - this information is sent +as HTTP "headers". Let's look at each of these in turn. + +Data +---- + +Sometimes you want to send data to a URL (often the URL will refer to +a CGI (Common Gateway Interface) script [#]_ or other web +application). With HTTP, this is often done using what's known as a +**POST** request. This is often what your browser does when you submit +a HTML form that you filled in on the web. Not all POSTs have to come +from forms: you can use a POST to transmit arbitrary data to your own +application. In the common case of HTML forms, the data needs to be +encoded in a standard way, and then passed to the Request object as +the ``data`` argument. The encoding is done using a function from the +``urllib`` library *not* from ``urllib2``. :: + + import urllib + import urllib2 + + url = 'http://www.someserver.com/cgi-bin/register.cgi' + values = {'name' : 'Michael Foord', + 'location' : 'Northampton', + 'language' : 'Python' } + + data = urllib.urlencode(values) + req = urllib2.Request(url, data) + response = urllib2.urlopen(req) + the_page = response.read() + +Note that other encodings are sometimes required (e.g. for file upload +from HTML forms - see +`HTML Specification, Form Submission <http://www.w3.org/TR/REC-html40/interact/forms.html#h-17.13>`_ +for more details). + +If you do not pass the ``data`` argument, urllib2 uses a **GET** +request. One way in which GET and POST requests differ is that POST +requests often have "side-effects": they change the state of the +system in some way (for example by placing an order with the website +for a hundredweight of tinned spam to be delivered to your door). +Though the HTTP standard makes it clear that POSTs are intended to +*always* cause side-effects, and GET requests *never* to cause +side-effects, nothing prevents a GET request from having side-effects, +nor a POST requests from having no side-effects. Data can also be +passed in an HTTP GET request by encoding it in the URL itself. + +This is done as follows:: + + >>> import urllib2 + >>> import urllib + >>> data = {} + >>> data['name'] = 'Somebody Here' + >>> data['location'] = 'Northampton' + >>> data['language'] = 'Python' + >>> url_values = urllib.urlencode(data) + >>> print url_values + name=Somebody+Here&language=Python&location=Northampton + >>> url = 'http://www.example.com/example.cgi' + >>> full_url = url + '?' + url_values + >>> data = urllib2.open(full_url) + +Notice that the full URL is created by adding a ``?`` to the URL, followed by +the encoded values. + +Headers +------- + +We'll discuss here one particular HTTP header, to illustrate how to +add headers to your HTTP request. + +Some websites [#]_ dislike being browsed by programs, or send +different versions to different browsers [#]_ . By default urllib2 +identifies itself as ``Python-urllib/x.y`` (where ``x`` and ``y`` are +the major and minor version numbers of the Python release, +e.g. ``Python-urllib/2.5``), which may confuse the site, or just plain +not work. The way a browser identifies itself is through the +``User-Agent`` header [#]_. When you create a Request object you can +pass a dictionary of headers in. The following example makes the same +request as above, but identifies itself as a version of Internet +Explorer [#]_. :: + + import urllib + import urllib2 + + url = 'http://www.someserver.com/cgi-bin/register.cgi' + user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' + values = {'name' : 'Michael Foord', + 'location' : 'Northampton', + 'language' : 'Python' } + headers = { 'User-Agent' : user_agent } + + data = urllib.urlencode(values) + req = urllib2.Request(url, data, headers) + response = urllib2.urlopen(req) + the_page = response.read() + +The response also has two useful methods. See the section on `info and +geturl`_ which comes after we have a look at what happens when things +go wrong. + + +Handling Exceptions +=================== + +*urlopen* raises ``URLError`` when it cannot handle a response (though +as usual with Python APIs, builtin exceptions such as ValueError, +TypeError etc. may also be raised). + +``HTTPError`` is the subclass of ``URLError`` raised in the specific +case of HTTP URLs. + +URLError +-------- + +Often, URLError is raised because there is no network connection (no +route to the specified server), or the specified server doesn't exist. +In this case, the exception raised will have a 'reason' attribute, +which is a tuple containing an error code and a text error message. + +e.g. :: + + >>> req = urllib2.Request('http://www.pretend_server.org') + >>> try: urllib2.urlopen(req) + >>> except URLError, e: + >>> print e.reason + >>> + (4, 'getaddrinfo failed') + + +HTTPError +--------- + +Every HTTP response from the server contains a numeric "status +code". Sometimes the status code indicates that the server is unable +to fulfil the request. The default handlers will handle some of these +responses for you (for example, if the response is a "redirection" +that requests the client fetch the document from a different URL, +urllib2 will handle that for you). For those it can't handle, urlopen +will raise an ``HTTPError``. Typical errors include '404' (page not +found), '403' (request forbidden), and '401' (authentication +required). + +See section 10 of RFC 2616 for a reference on all the HTTP error +codes. + +The ``HTTPError`` instance raised will have an integer 'code' +attribute, which corresponds to the error sent by the server. + +Error Codes +~~~~~~~~~~~ + +Because the default handlers handle redirects (codes in the 300 +range), and codes in the 100-299 range indicate success, you will +usually only see error codes in the 400-599 range. + +``BaseHTTPServer.BaseHTTPRequestHandler.responses`` is a useful +dictionary of response codes in that shows all the response codes used +by RFC 2616. The dictionary is reproduced here for convenience :: + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + responses = { + 100: ('Continue', 'Request received, please continue'), + 101: ('Switching Protocols', + 'Switching to new protocol; obey Upgrade header'), + + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), + 204: ('No Content', 'Request fulfilled, nothing follows'), + 205: ('Reset Content', 'Clear input form for further input.'), + 206: ('Partial Content', 'Partial content follows.'), + + 300: ('Multiple Choices', + 'Object has several resources -- see URI list'), + 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('See Other', 'Object moved -- see Method and URL list'), + 304: ('Not Modified', + 'Document has not changed since given time'), + 305: ('Use Proxy', + 'You must use proxy specified in Location to access this ' + 'resource.'), + 307: ('Temporary Redirect', + 'Object moved temporarily -- see URI list'), + + 400: ('Bad Request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment Required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not Found', 'Nothing matches the given URI'), + 405: ('Method Not Allowed', + 'Specified method is invalid for this server.'), + 406: ('Not Acceptable', 'URI not available in preferred format.'), + 407: ('Proxy Authentication Required', 'You must authenticate with ' + 'this proxy before proceeding.'), + 408: ('Request Timeout', 'Request timed out; try again later.'), + 409: ('Conflict', 'Request conflict.'), + 410: ('Gone', + 'URI no longer exists and has been permanently removed.'), + 411: ('Length Required', 'Client must specify Content-Length.'), + 412: ('Precondition Failed', 'Precondition in headers is false.'), + 413: ('Request Entity Too Large', 'Entity is too large.'), + 414: ('Request-URI Too Long', 'URI is too long.'), + 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), + 416: ('Requested Range Not Satisfiable', + 'Cannot satisfy request range.'), + 417: ('Expectation Failed', + 'Expect condition could not be satisfied.'), + + 500: ('Internal Server Error', 'Server got itself in trouble'), + 501: ('Not Implemented', + 'Server does not support this operation'), + 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), + 503: ('Service Unavailable', + 'The server cannot process the request due to a high load'), + 504: ('Gateway Timeout', + 'The gateway server did not receive a timely response'), + 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + } + +When an error is raised the server responds by returning an HTTP error +code *and* an error page. You can use the ``HTTPError`` instance as a +response on the page returned. This means that as well as the code +attribute, it also has read, geturl, and info, methods. :: + + >>> req = urllib2.Request('http://www.python.org/fish.html') + >>> try: + >>> urllib2.urlopen(req) + >>> except URLError, e: + >>> print e.code + >>> print e.read() + >>> + 404 + <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> + <?xml-stylesheet href="./css/ht2html.css" + type="text/css"?> + <html><head><title>Error 404: File Not Found</title> + ...... etc... + +Wrapping it Up +-------------- + +So if you want to be prepared for ``HTTPError`` *or* ``URLError`` +there are two basic approaches. I prefer the second approach. + +Number 1 +~~~~~~~~ + +:: + + + from urllib2 import Request, urlopen, URLError, HTTPError + req = Request(someurl) + try: + response = urlopen(req) + except HTTPError, e: + print 'The server couldn\'t fulfill the request.' + print 'Error code: ', e.code + except URLError, e: + print 'We failed to reach a server.' + print 'Reason: ', e.reason + else: + # everything is fine + + +.. note:: + + The ``except HTTPError`` *must* come first, otherwise ``except URLError`` + will *also* catch an ``HTTPError``. + +Number 2 +~~~~~~~~ + +:: + + from urllib2 import Request, urlopen, URLError + req = Request(someurl) + try: + response = urlopen(req) + except URLError, e: + if hasattr(e, 'reason'): + print 'We failed to reach a server.' + print 'Reason: ', e.reason + elif hasattr(e, 'code'): + print 'The server couldn\'t fulfill the request.' + print 'Error code: ', e.code + else: + # everything is fine + + +info and geturl +=============== + +The response returned by urlopen (or the ``HTTPError`` instance) has +two useful methods ``info`` and ``geturl``. + +**geturl** - this returns the real URL of the page fetched. This is +useful because ``urlopen`` (or the opener object used) may have +followed a redirect. The URL of the page fetched may not be the same +as the URL requested. + +**info** - this returns a dictionary-like object that describes the +page fetched, particularly the headers sent by the server. It is +currently an ``httplib.HTTPMessage`` instance. + +Typical headers include 'Content-length', 'Content-type', and so +on. See the +`Quick Reference to HTTP Headers <http://www.cs.tut.fi/~jkorpela/http.html>`_ +for a useful listing of HTTP headers with brief explanations of their meaning +and use. + + +Openers and Handlers +==================== + +When you fetch a URL you use an opener (an instance of the perhaps +confusingly-named ``urllib2.OpenerDirector``). Normally we have been using +the default opener - via ``urlopen`` - but you can create custom +openers. Openers use handlers. All the "heavy lifting" is done by the +handlers. Each handler knows how to open URLs for a particular URL +scheme (http, ftp, etc.), or how to handle an aspect of URL opening, +for example HTTP redirections or HTTP cookies. + +You will want to create openers if you want to fetch URLs with +specific handlers installed, for example to get an opener that handles +cookies, or to get an opener that does not handle redirections. + +To create an opener, instantiate an OpenerDirector, and then call +.add_handler(some_handler_instance) repeatedly. + +Alternatively, you can use ``build_opener``, which is a convenience +function for creating opener objects with a single function call. +``build_opener`` adds several handlers by default, but provides a +quick way to add more and/or override the default handlers. + +Other sorts of handlers you might want to can handle proxies, +authentication, and other common but slightly specialised +situations. + +``install_opener`` can be used to make an ``opener`` object the +(global) default opener. This means that calls to ``urlopen`` will use +the opener you have installed. + +Opener objects have an ``open`` method, which can be called directly +to fetch urls in the same way as the ``urlopen`` function: there's no +need to call ``install_opener``, except as a convenience. + + +Basic Authentication +==================== + +To illustrate creating and installing a handler we will use the +``HTTPBasicAuthHandler``. For a more detailed discussion of this +subject - including an explanation of how Basic Authentication works - +see the `Basic Authentication Tutorial <http://www.voidspace.org.uk/python/articles/authentication.shtml>`_. + +When authentication is required, the server sends a header (as well as +the 401 error code) requesting authentication. This specifies the +authentication scheme and a 'realm'. The header looks like : +``Www-authenticate: SCHEME realm="REALM"``. + +e.g. :: + + Www-authenticate: Basic realm="cPanel Users" + + +The client should then retry the request with the appropriate name and +password for the realm included as a header in the request. This is +'basic authentication'. In order to simplify this process we can +create an instance of ``HTTPBasicAuthHandler`` and an opener to use +this handler. + +The ``HTTPBasicAuthHandler`` uses an object called a password manager +to handle the mapping of URLs and realms to passwords and +usernames. If you know what the realm is (from the authentication +header sent by the server), then you can use a +``HTTPPasswordMgr``. Frequently one doesn't care what the realm is. In +that case, it is convenient to use +``HTTPPasswordMgrWithDefaultRealm``. This allows you to specify a +default username and password for a URL. This will be supplied in the +absence of you providing an alternative combination for a specific +realm. We indicate this by providing ``None`` as the realm argument to +the ``add_password`` method. + +The top-level URL is the first URL that requires authentication. URLs +"deeper" than the URL you pass to .add_password() will also match. :: + + # create a password manager + password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() + + # Add the username and password. + # If we knew the realm, we could use it instead of ``None``. + top_level_url = "http://example.com/foo/" + password_mgr.add_password(None, top_level_url, username, password) + + handler = urllib2.HTTPBasicAuthHandler(password_mgr) + + # create "opener" (OpenerDirector instance) + opener = urllib2.build_opener(handler) + + # use the opener to fetch a URL + opener.open(a_url) + + # Install the opener. + # Now all calls to urllib2.urlopen use our opener. + urllib2.install_opener(opener) + +.. note:: + + In the above example we only supplied our ``HHTPBasicAuthHandler`` + to ``build_opener``. By default openers have the handlers for + normal situations - ``ProxyHandler``, ``UnknownHandler``, + ``HTTPHandler``, ``HTTPDefaultErrorHandler``, + ``HTTPRedirectHandler``, ``FTPHandler``, ``FileHandler``, + ``HTTPErrorProcessor``. + +top_level_url is in fact *either* a full URL (including the 'http:' +scheme component and the hostname and optionally the port number) +e.g. "http://example.com/" *or* an "authority" (i.e. the hostname, +optionally including the port number) e.g. "example.com" or +"example.com:8080" (the latter example includes a port number). The +authority, if present, must NOT contain the "userinfo" component - for +example "joe@password:example.com" is not correct. + + +Proxies +======= + +**urllib2** will auto-detect your proxy settings and use those. This +is through the ``ProxyHandler`` which is part of the normal handler +chain. Normally that's a good thing, but there are occasions when it +may not be helpful [#]_. One way to do this is to setup our own +``ProxyHandler``, with no proxies defined. This is done using similar +steps to setting up a `Basic Authentication`_ handler : :: + + >>> proxy_support = urllib2.ProxyHandler({}) + >>> opener = urllib2.build_opener(proxy_support) + >>> urllib2.install_opener(opener) + +.. note:: + + Currently ``urllib2`` *does not* support fetching of ``https`` + locations through a proxy. This can be a problem. + +Sockets and Layers +================== + +The Python support for fetching resources from the web is +layered. urllib2 uses the httplib library, which in turn uses the +socket library. + +As of Python 2.3 you can specify how long a socket should wait for a +response before timing out. This can be useful in applications which +have to fetch web pages. By default the socket module has *no timeout* +and can hang. Currently, the socket timeout is not exposed at the +httplib or urllib2 levels. However, you can set the default timeout +globally for all sockets using : :: + + import socket + import urllib2 + + # timeout in seconds + timeout = 10 + socket.setdefaulttimeout(timeout) + + # this call to urllib2.urlopen now uses the default timeout + # we have set in the socket module + req = urllib2.Request('http://www.voidspace.org.uk') + response = urllib2.urlopen(req) + + +------- + + +Footnotes +========= + +This document was reviewed and revised by John Lee. + +.. [#] For an introduction to the CGI protocol see + `Writing Web Applications in Python <http://www.pyzine.com/Issue008/Section_Articles/article_CGIOne.html>`_. +.. [#] Like Google for example. The *proper* way to use google from a program + is to use `PyGoogle <http://pygoogle.sourceforge.net>`_ of course. See + `Voidspace Google <http://www.voidspace.org.uk/python/recipebook.shtml#google>`_ + for some examples of using the Google API. +.. [#] Browser sniffing is a very bad practise for website design - building + sites using web standards is much more sensible. Unfortunately a lot of + sites still send different versions to different browsers. +.. [#] The user agent for MSIE 6 is + *'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'* +.. [#] For details of more HTTP request headers, see + `Quick Reference to HTTP Headers`_. +.. [#] In my case I have to use a proxy to access the internet at work. If you + attempt to fetch *localhost* URLs through this proxy it blocks them. IE + is set to use the proxy, which urllib2 picks up on. In order to test + scripts with a localhost server, I have to prevent urllib2 from using + the proxy. diff --git a/Doc/inst/inst.tex b/Doc/inst/inst.tex index 4961a1a..676f8ae 100644 --- a/Doc/inst/inst.tex +++ b/Doc/inst/inst.tex @@ -726,8 +726,8 @@ There are two environment variables that can modify \code{sys.path}. \envvar{PYTHONHOME} sets an alternate value for the prefix of the Python installation. For example, if \envvar{PYTHONHOME} is set to \samp{/www/python}, the search path will be set to \code{['', -'/www/python/lib/python2.2/', '/www/python/lib/python2.3/plat-linux2', -...]}. +'/www/python/lib/python\shortversion/', +'/www/python/lib/python\shortversion/plat-linux2', ...]}. The \envvar{PYTHONPATH} variable can be set to a list of paths that will be added to the beginning of \code{sys.path}. For example, if @@ -981,15 +981,15 @@ different from the format used by the Python version you can download from the Python or ActiveState Web site. (Python is built with Microsoft Visual \Cpp, which uses COFF as the object file format.) For this reason you have to convert Python's library -\file{python24.lib} into the Borland format. You can do this as +\file{python25.lib} into the Borland format. You can do this as follows: \begin{verbatim} -coff2omf python24.lib python24_bcpp.lib +coff2omf python25.lib python25_bcpp.lib \end{verbatim} The \file{coff2omf} program comes with the Borland compiler. The file -\file{python24.lib} is in the \file{Libs} directory of your Python +\file{python25.lib} is in the \file{Libs} directory of your Python installation. If your extension uses other libraries (zlib,...) you have to convert them too. @@ -1053,17 +1053,23 @@ First you have to create a list of symbols which the Python DLL exports. PExports 0.42h there.) \begin{verbatim} -pexports python24.dll >python24.def +pexports python25.dll >python25.def \end{verbatim} +The location of an installed \file{python25.dll} will depend on the +installation options and the version and language of Windows. In a +``just for me'' installation, it will appear in the root of the +installation directory. In a shared installation, it will be located +in the system directory. + Then you can create from these information an import library for gcc. \begin{verbatim} -dlltool --dllname python24.dll --def python24.def --output-lib libpython24.a +/cygwin/bin/dlltool --dllname python25.dll --def python25.def --output-lib libpython25.a \end{verbatim} The resulting library has to be placed in the same directory as -\file{python24.lib}. (Should be the \file{libs} directory under your +\file{python25.lib}. (Should be the \file{libs} directory under your Python installation directory.) If your extension uses other libraries (zlib,...) you might diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index eac35de..cf657c3 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -224,6 +224,7 @@ and how to embed it in other applications. \input{libdbhash} \input{libbsddb} \input{libdumbdbm} +\input{libsqlite3} % ============= @@ -243,6 +244,8 @@ and how to embed it in other applications. \input{libcursespanel} \input{libplatform} \input{liberrno} +\input{libctypes} +\input{libctypesref} \input{libsomeos} % Optional Operating System Services \input{libselect} @@ -359,7 +362,7 @@ and how to embed it in other applications. \input{libprofile} % The Python Profiler \input{libhotshot} % unmaintained C profiler \input{libtimeit} - +\input{libtrace} % ============= % PYTHON ENGINE @@ -444,6 +447,7 @@ and how to embed it in other applications. \input{libsunaudio} \input{windows} % MS Windows ONLY +\input{libmsilib} \input{libmsvcrt} \input{libwinreg} \input{libwinsound} diff --git a/Doc/lib/libcodecs.tex b/Doc/lib/libcodecs.tex index 6e0bc8d..05c0375 100644 --- a/Doc/lib/libcodecs.tex +++ b/Doc/lib/libcodecs.tex @@ -161,7 +161,7 @@ directly. \end{funcdesc} \begin{funcdesc}{lookup_error}{name} -Return the error handler previously register under the name \var{name}. +Return the error handler previously registered under the name \var{name}. Raises a \exception{LookupError} in case the handler cannot be found. \end{funcdesc} @@ -366,7 +366,7 @@ steps. It defines the following methods which every incremental encoder must define in order to be compatible with the Python codec registry. \begin{classdesc}{IncrementalEncoder}{\optional{errors}} - Constructor for a \class{IncrementalEncoder} instance. + Constructor for an \class{IncrementalEncoder} instance. All incremental encoders must provide this constructor interface. They are free to add additional keyword arguments, but only the ones defined @@ -413,7 +413,7 @@ steps. It defines the following methods which every incremental decoder must define in order to be compatible with the Python codec registry. \begin{classdesc}{IncrementalDecoder}{\optional{errors}} - Constructor for a \class{IncrementalDecoder} instance. + Constructor for an \class{IncrementalDecoder} instance. All incremental decoders must provide this constructor interface. They are free to add additional keyword arguments, but only the ones defined diff --git a/Doc/lib/libcodeop.tex b/Doc/lib/libcodeop.tex index 7d6153e..6972b6f 100644 --- a/Doc/lib/libcodeop.tex +++ b/Doc/lib/libcodeop.tex @@ -19,7 +19,7 @@ There are two parts to this job: \begin{enumerate} \item Being able to tell if a line of input completes a Python statement: in short, telling whether to print - `\code{>\code{>}>~}' or `\code{...~}' next. + `\code{>>>~}' or `\code{...~}' next. \item Remembering which future statements the user has entered, so subsequent input can be compiled with these in effect. \end{enumerate} diff --git a/Doc/lib/libcollections.tex b/Doc/lib/libcollections.tex index d9bfa39..3e56a3e 100644 --- a/Doc/lib/libcollections.tex +++ b/Doc/lib/libcollections.tex @@ -59,12 +59,12 @@ Deque objects support the following methods: \begin{methoddesc}{pop}{} Remove and return an element from the right side of the deque. - If no elements are present, raises a \exception{IndexError}. + If no elements are present, raises an \exception{IndexError}. \end{methoddesc} \begin{methoddesc}{popleft}{} Remove and return an element from the left side of the deque. - If no elements are present, raises a \exception{IndexError}. + If no elements are present, raises an \exception{IndexError}. \end{methoddesc} \begin{methoddesc}{remove}{value} diff --git a/Doc/lib/libcontextlib.tex b/Doc/lib/libcontextlib.tex index 46f9cdd..72bf537 100644 --- a/Doc/lib/libcontextlib.tex +++ b/Doc/lib/libcontextlib.tex @@ -12,11 +12,13 @@ This module provides utilities for common tasks involving the Functions provided: \begin{funcdesc}{contextmanager}{func} -This function is a decorator that can be used to define context managers -for use with the \keyword{with} statement, without needing to create a -class or separate \method{__enter__()} and \method{__exit__()} methods. +This function is a decorator that can be used to define a factory +function for \keyword{with} statement context managers, without +needing to create a class or separate \method{__enter__()} and +\method{__exit__()} methods. -A simple example: +A simple example (this is not recommended as a real way of +generating HTML!): \begin{verbatim} from __future__ import with_statement @@ -36,9 +38,10 @@ foo </h1> \end{verbatim} -When called, the decorated function must return a generator-iterator. -This iterator must yield exactly one value, which will be bound to the -targets in the \keyword{with} statement's \keyword{as} clause, if any. +The function being decorated must return a generator-iterator when +called. This iterator must yield exactly one value, which will be +bound to the targets in the \keyword{with} statement's \keyword{as} +clause, if any. At the point where the generator yields, the block nested in the \keyword{with} statement is executed. The generator is then resumed @@ -46,37 +49,16 @@ after the block is exited. If an unhandled exception occurs in the block, it is reraised inside the generator at the point where the yield occurred. Thus, you can use a \keyword{try}...\keyword{except}...\keyword{finally} statement to trap -the error (if any), or ensure that some cleanup takes place. - -Note that you can use \code{@contextmanager} to define a context -manager's \method{__context__} method. This is usually more convenient -than creating another class just to serve as a context. For example: - -\begin{verbatim} -from __future__ import with_statement -from contextlib import contextmanager - -class Tag: - def __init__(self, name): - self.name = name - - @contextmanager - def __context__(self): - print "<%s>" % self.name - yield self - print "</%s>" % self.name - -h1 = Tag("h1") - ->>> with h1 as me: -... print "hello from", me -<h1> -hello from <__main__.Tag instance at 0x402ce8ec> -</h1> -\end{verbatim} +the error (if any), or ensure that some cleanup takes place. If an +exception is trapped merely in order to log it or to perform some +action (rather than to suppress it entirely), the generator must +reraise that exception. Otherwise the generator context manager will +indicate to the \keyword{with} statement that the exception has been +handled, and execution will resume with the statement immediately +following the \keyword{with} statement. \end{funcdesc} -\begin{funcdesc}{nested}{ctx1\optional{, ctx2\optional{, ...}}} +\begin{funcdesc}{nested}{mgr1\optional{, mgr2\optional{, ...}}} Combine multiple context managers into a single nested context manager. Code like this: @@ -97,18 +79,22 @@ with A as X: do_something() \end{verbatim} -Note that if one of the nested contexts' \method{__exit__()} method -raises an exception, any previous exception state will be lost; the new -exception will be passed to the outer contexts' \method{__exit__()} -method(s), if any. In general, \method{__exit__()} methods should avoid -raising exceptions, and in particular they should not re-raise a +Note that if the \method{__exit__()} method of one of the nested +context managers indicates an exception should be suppressed, no +exception information will be passed to any remaining outer context +managers. Similarly, if the \method{__exit__()} method of one of the +nested managers raises an exception, any previous exception state will +be lost; the new exception will be passed to the +\method{__exit__()} methods of any remaining outer context managers. +In general, \method{__exit__()} methods should avoid raising +exceptions, and in particular they should not re-raise a passed-in exception. \end{funcdesc} \label{context-closing} \begin{funcdesc}{closing}{thing} -Return a context manager that closes \var{thing} upon completion of the -block. This is basically equivalent to: +Return a context manager that closes \var{thing} upon completion of +the block. This is basically equivalent to: \begin{verbatim} from contextlib import contextmanager @@ -127,14 +113,14 @@ from __future__ import with_statement from contextlib import closing import codecs -with closing(codecs.open("foo", encoding="utf8")) as f: - for line in f: - print line.encode("latin1") +with closing(urllib.urlopen('http://www.python.org')) as page: + for line in page: + print line \end{verbatim} -without needing to explicitly close \code{f}. Even if an error occurs, -\code{f.close()} will be called when the \keyword{with} block is exited. - +without needing to explicitly close \code{page}. Even if an error +occurs, \code{page.close()} will be called when the \keyword{with} +block is exited. \end{funcdesc} \begin{seealso} diff --git a/Doc/lib/libctypes.tex b/Doc/lib/libctypes.tex new file mode 100755 index 0000000..dc37749 --- /dev/null +++ b/Doc/lib/libctypes.tex @@ -0,0 +1,1226 @@ +\newlength{\locallinewidth} +\setlength{\locallinewidth}{\linewidth} +\section{\module{ctypes} --- A foreign function library for Python.} +\declaremodule{standard}{ctypes} +\moduleauthor{Thomas Heller}{theller@python.net} +\modulesynopsis{A foreign function library for Python.} +\versionadded{2.5} + +\code{ctypes} is a foreign function library for Python. + + +\subsection{ctypes tutorial\label{ctypes-ctypes-tutorial}} + +This tutorial describes version 0.9.9 of \code{ctypes}. + +Note: The code samples in this tutorial uses \code{doctest} to make sure +that they actually work. Since some code samples behave differently +under Linux, Windows, or Mac OS X, they contain doctest directives in +comments. + +Note: Quite some code samples references the ctypes \class{c{\_}int} type. +This type is an alias to the \class{c{\_}long} type on 32-bit systems. So, +you should not be confused if \class{c{\_}long} is printed if you would +expect \class{c{\_}int} - they are actually the same type. + + +\subsubsection{Loading dynamic link libraries\label{ctypes-loading-dynamic-link-libraries}} + +\code{ctypes} exports the \var{cdll}, and on Windows also \var{windll} and +\var{oledll} objects to load dynamic link libraries. + +You load libraries by accessing them as attributes of these objects. +\var{cdll} loads libraries which export functions using the standard +\code{cdecl} calling convention, while \var{windll} libraries call +functions using the \code{stdcall} calling convention. \var{oledll} also +uses the \code{stdcall} calling convention, and assumes the functions +return a Windows \class{HRESULT} error code. The error code is used to +automatically raise \class{WindowsError} Python exceptions when the +function call fails. + +Here are some examples for Windows, note that \code{msvcrt} is the MS +standard C library containing most standard C functions, and uses the +cdecl calling convention: +\begin{verbatim} +>>> from ctypes import * +>>> print windll.kernel32 # doctest: +WINDOWS +<WinDLL 'kernel32', handle ... at ...> +>>> print cdll.msvcrt # doctest: +WINDOWS +<CDLL 'msvcrt', handle ... at ...> +>>> libc = cdll.msvcrt # doctest: +WINDOWS +>>> +\end{verbatim} + +Windows appends the usual '.dll' file suffix automatically. + +On Linux, it is required to specify the filename \emph{including} the +extension to load a library, so attribute access does not work. +Either the \method{LoadLibrary} method of the dll loaders should be used, +or you should load the library by creating an instance of CDLL by +calling the constructor: +\begin{verbatim} +>>> cdll.LoadLibrary("libc.so.6") # doctest: +LINUX +<CDLL 'libc.so.6', handle ... at ...> +>>> libc = CDLL("libc.so.6") # doctest: +LINUX +>>> libc # doctest: +LINUX +<CDLL 'libc.so.6', handle ... at ...> +>>> +\end{verbatim} + +XXX Add section for Mac OS X. + + +\subsubsection{Accessing functions from loaded dlls\label{ctypes-accessing-functions-from-loaded-dlls}} + +Functions are accessed as attributes of dll objects: +\begin{verbatim} +>>> from ctypes import * +>>> libc.printf +<_FuncPtr object at 0x...> +>>> print windll.kernel32.GetModuleHandleA # doctest: +WINDOWS +<_FuncPtr object at 0x...> +>>> print windll.kernel32.MyOwnFunction # doctest: +WINDOWS +Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "ctypes.py", line 239, in __getattr__ + func = _StdcallFuncPtr(name, self) +AttributeError: function 'MyOwnFunction' not found +>>> +\end{verbatim} + +Note that win32 system dlls like \code{kernel32} and \code{user32} often +export ANSI as well as UNICODE versions of a function. The UNICODE +version is exported with an \code{W} appended to the name, while the ANSI +version is exported with an \code{A} appended to the name. The win32 +\code{GetModuleHandle} function, which returns a \emph{module handle} for a +given module name, has the following C prototype, and a macro is used +to expose one of them as \code{GetModuleHandle} depending on whether +UNICODE is defined or not: +\begin{verbatim} +/* ANSI version */ +HMODULE GetModuleHandleA(LPCSTR lpModuleName); +/* UNICODE version */ +HMODULE GetModuleHandleW(LPCWSTR lpModuleName); +\end{verbatim} + +\var{windll} does not try to select one of them by magic, you must +access the version you need by specifying \code{GetModuleHandleA} or +\code{GetModuleHandleW} explicitely, and then call it with normal strings +or unicode strings respectively. + +Sometimes, dlls export functions with names which aren't valid Python +identifiers, like \code{"??2@YAPAXI@Z"}. In this case you have to use +\code{getattr} to retrieve the function: +\begin{verbatim} +>>> getattr(cdll.msvcrt, "??2@YAPAXI@Z") # doctest: +WINDOWS +<_FuncPtr object at 0x...> +>>> +\end{verbatim} + +On Windows, some dlls export functions not by name but by ordinal. +These functions can be accessed by indexing the dll object with the +odinal number: +\begin{verbatim} +>>> cdll.kernel32[1] # doctest: +WINDOWS +<_FuncPtr object at 0x...> +>>> cdll.kernel32[0] # doctest: +WINDOWS +Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "ctypes.py", line 310, in __getitem__ + func = _StdcallFuncPtr(name, self) +AttributeError: function ordinal 0 not found +>>> +\end{verbatim} + + +\subsubsection{Calling functions\label{ctypes-calling-functions}} + +You can call these functions like any other Python callable. This +example uses the \code{time()} function, which returns system time in +seconds since the \UNIX{} epoch, and the \code{GetModuleHandleA()} function, +which returns a win32 module handle. + +This example calls both functions with a NULL pointer (\code{None} should +be used as the NULL pointer): +\begin{verbatim} +>>> print libc.time(None) +114... +>>> print hex(windll.kernel32.GetModuleHandleA(None)) # doctest: +WINDOWS +0x1d000000 +>>> +\end{verbatim} + +\code{ctypes} tries to protect you from calling functions with the wrong +number of arguments. Unfortunately this only works on Windows. It +does this by examining the stack after the function returns: +\begin{verbatim} +>>> windll.kernel32.GetModuleHandleA() # doctest: +WINDOWS +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ValueError: Procedure probably called with not enough arguments (4 bytes missing) +>>> windll.kernel32.GetModuleHandleA(0, 0) # doctest: +WINDOWS +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ValueError: Procedure probably called with too many arguments (4 bytes in excess) +>>> +\end{verbatim} + +On Windows, \code{ctypes} uses win32 structured exception handling to +prevent crashes from general protection faults when functions are +called with invalid argument values: +\begin{verbatim} +>>> windll.kernel32.GetModuleHandleA(32) # doctest: +WINDOWS +Traceback (most recent call last): + File "<stdin>", line 1, in ? +WindowsError: exception: access violation reading 0x00000020 +>>> +\end{verbatim} + +There are, however, enough ways to crash Python with \code{ctypes}, so +you should be careful anyway. + +Python integers, strings and unicode strings are the only objects that +can directly be used as parameters in these function calls. + +Before we move on calling functions with other parameter types, we +have to learn more about \code{ctypes} data types. + + +\subsubsection{Simple data types\label{ctypes-simple-data-types}} + +\code{ctypes} defines a number of primitive C compatible data types : +\begin{quote} + +\begin{longtable}[c]{|p{0.19\locallinewidth}|p{0.28\locallinewidth}|p{0.14\locallinewidth}|} +\hline +\textbf{ +ctypes type +} & \textbf{ +C type +} & \textbf{ +Python type +} \\ +\hline +\endhead + +\class{c{\_}char} + & +\code{char} + & +character + \\ +\hline + +\class{c{\_}byte} + & +\code{char} + & +integer + \\ +\hline + +\class{c{\_}ubyte} + & +\code{unsigned char} + & +integer + \\ +\hline + +\class{c{\_}short} + & +\code{short} + & +integer + \\ +\hline + +\class{c{\_}ushort} + & +\code{unsigned short} + & +integer + \\ +\hline + +\class{c{\_}int} + & +\code{int} + & +integer + \\ +\hline + +\class{c{\_}uint} + & +\code{unsigned int} + & +integer + \\ +\hline + +\class{c{\_}long} + & +\code{long} + & +integer + \\ +\hline + +\class{c{\_}ulong} + & +\code{unsigned long} + & +long + \\ +\hline + +\class{c{\_}longlong} + & +\code{{\_}{\_}int64} or +\code{long long} + & +long + \\ +\hline + +\class{c{\_}ulonglong} + & +\code{unsigned {\_}{\_}int64} or +\code{unsigned long long} + & +long + \\ +\hline + +\class{c{\_}float} + & +\code{float} + & +float + \\ +\hline + +\class{c{\_}double} + & +\code{double} + & +float + \\ +\hline + +\class{c{\_}char{\_}p} + & +\code{char *} +(NUL terminated) + & +string or +\code{None} + \\ +\hline + +\class{c{\_}wchar{\_}p} + & +\code{wchar{\_}t *} +(NUL terminated) + & +unicode or +\code{None} + \\ +\hline + +\class{c{\_}void{\_}p} + & +\code{void *} + & +integer or +\code{None} + \\ +\hline +\end{longtable} +\end{quote} + +All these types can be created by calling them with an optional +initializer of the correct type and value: +\begin{verbatim} +>>> c_int() +c_long(0) +>>> c_char_p("Hello, World") +c_char_p('Hello, World') +>>> c_ushort(-3) +c_ushort(65533) +>>> +\end{verbatim} + +Since these types are mutable, their value can also be changed +afterwards: +\begin{verbatim} +>>> i = c_int(42) +>>> print i +c_long(42) +>>> print i.value +42 +>>> i.value = -99 +>>> print i.value +-99 +>>> +\end{verbatim} + +Assigning a new value to instances of the pointer types \class{c{\_}char{\_}p}, +\class{c{\_}wchar{\_}p}, and \class{c{\_}void{\_}p} changes the \emph{memory location} they +point to, \emph{not the contents} of the memory block (of course not, +because Python strings are immutable): +\begin{verbatim} +>>> s = "Hello, World" +>>> c_s = c_char_p(s) +>>> print c_s +c_char_p('Hello, World') +>>> c_s.value = "Hi, there" +>>> print c_s +c_char_p('Hi, there') +>>> print s # first string is unchanged +Hello, World +\end{verbatim} + +You should be careful, however, not to pass them to functions +expecting pointers to mutable memory. If you need mutable memory +blocks, ctypes has a \code{create{\_}string{\_}buffer} function which creates +these in various ways. The current memory block contents can be +accessed (or changed) with the \code{raw} property, if you want to access +it as NUL terminated string, use the \code{string} property: +\begin{verbatim} +>>> from ctypes import * +>>> p = create_string_buffer(3) # create a 3 byte buffer, initialized to NUL bytes +>>> print sizeof(p), repr(p.raw) +3 '\x00\x00\x00' +>>> p = create_string_buffer("Hello") # create a buffer containing a NUL terminated string +>>> print sizeof(p), repr(p.raw) +6 'Hello\x00' +>>> print repr(p.value) +'Hello' +>>> p = create_string_buffer("Hello", 10) # create a 10 byte buffer +>>> print sizeof(p), repr(p.raw) +10 'Hello\x00\x00\x00\x00\x00' +>>> p.value = "Hi" +>>> print sizeof(p), repr(p.raw) +10 'Hi\x00lo\x00\x00\x00\x00\x00' +>>> +\end{verbatim} + +The \code{create{\_}string{\_}buffer} function replaces the \code{c{\_}buffer} +function (which is still available as an alias), as well as the +\code{c{\_}string} function from earlier ctypes releases. To create a +mutable memory block containing unicode characters of the C type +\code{wchar{\_}t} use the \code{create{\_}unicode{\_}buffer} function. + + +\subsubsection{Calling functions, continued\label{ctypes-calling-functions-continued}} + +Note that printf prints to the real standard output channel, \emph{not} to +\code{sys.stdout}, so these examples will only work at the console +prompt, not from within \emph{IDLE} or \emph{PythonWin}: +\begin{verbatim} +>>> printf = libc.printf +>>> printf("Hello, %s\n", "World!") +Hello, World! +14 +>>> printf("Hello, %S", u"World!") +Hello, World! +13 +>>> printf("%d bottles of beer\n", 42) +42 bottles of beer +19 +>>> printf("%f bottles of beer\n", 42.5) +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ArgumentError: argument 2: exceptions.TypeError: Don't know how to convert parameter 2 +>>> +\end{verbatim} + +As has been mentioned before, all Python types except integers, +strings, and unicode strings have to be wrapped in their corresponding +\code{ctypes} type, so that they can be converted to the required C data +type: +\begin{verbatim} +>>> printf("An int %d, a double %f\n", 1234, c_double(3.14)) +Integer 1234, double 3.1400001049 +31 +>>> +\end{verbatim} + + +\subsubsection{Calling functions with your own custom data types\label{ctypes-calling-functions-with-own-custom-data-types}} + +You can also customize \code{ctypes} argument conversion to allow +instances of your own classes be used as function arguments. +\code{ctypes} looks for an \member{{\_}as{\_}parameter{\_}} attribute and uses this as +the function argument. Of course, it must be one of integer, string, +or unicode: +\begin{verbatim} +>>> class Bottles(object): +... def __init__(self, number): +... self._as_parameter_ = number +... +>>> bottles = Bottles(42) +>>> printf("%d bottles of beer\n", bottles) +42 bottles of beer +19 +>>> +\end{verbatim} + +If you don't want to store the instance's data in the +\member{{\_}as{\_}parameter{\_}} instance variable, you could define a \code{property} +which makes the data avaiblable. + + +\subsubsection{Specifying the required argument types (function prototypes)\label{ctypes-specifying-required-argument-types}} + +It is possible to specify the required argument types of functions +exported from DLLs by setting the \member{argtypes} attribute. + +\member{argtypes} must be a sequence of C data types (the \code{printf} +function is probably not a good example here, because it takes a +variable number and different types of parameters depending on the +format string, on the other hand this is quite handy to experiment +with this feature): +\begin{verbatim} +>>> printf.argtypes = [c_char_p, c_char_p, c_int, c_double] +>>> printf("String '%s', Int %d, Double %f\n", "Hi", 10, 2.2) +String 'Hi', Int 10, Double 2.200000 +37 +>>> +\end{verbatim} + +Specifying a format protects against incompatible argument types (just +as a prototype for a C function), and tries to convert the arguments +to valid types: +\begin{verbatim} +>>> printf("%d %d %d", 1, 2, 3) +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ArgumentError: argument 2: exceptions.TypeError: wrong type +>>> printf("%s %d %f", "X", 2, 3) +X 2 3.00000012 +12 +>>> +\end{verbatim} + +If you have defined your own classes which you pass to function calls, +you have to implement a \method{from{\_}param} class method for them to be +able to use them in the \member{argtypes} sequence. The \method{from{\_}param} +class method receives the Python object passed to the function call, +it should do a typecheck or whatever is needed to make sure this +object is acceptable, and then return the object itself, it's +\member{{\_}as{\_}parameter{\_}} attribute, or whatever you want to pass as the C +function argument in this case. Again, the result should be an +integer, string, unicode, a \code{ctypes} instance, or something having +the \member{{\_}as{\_}parameter{\_}} attribute. + + +\subsubsection{Return types\label{ctypes-return-types}} + +By default functions are assumed to return integers. Other return +types can be specified by setting the \member{restype} attribute of the +function object. + +Here is a more advanced example, it uses the strchr function, which +expects a string pointer and a char, and returns a pointer to a +string: +\begin{verbatim} +>>> strchr = libc.strchr +>>> strchr("abcdef", ord("d")) # doctest: +SKIP +8059983 +>>> strchr.restype = c_char_p # c_char_p is a pointer to a string +>>> strchr("abcdef", ord("d")) +'def' +>>> print strchr("abcdef", ord("x")) +None +>>> +\end{verbatim} + +If you want to avoid the \code{ord("x")} calls above, you can set the +\member{argtypes} attribute, and the second argument will be converted from +a single character Python string into a C char: +\begin{verbatim} +>>> strchr.restype = c_char_p +>>> strchr.argtypes = [c_char_p, c_char] +>>> strchr("abcdef", "d") +'def' +>>> strchr("abcdef", "def") +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ArgumentError: argument 2: exceptions.TypeError: one character string expected +>>> print strchr("abcdef", "x") +None +>>> strchr("abcdef", "d") +'def' +>>> +\end{verbatim} + +XXX Mention the \member{errcheck} protocol... + +You can also use a callable Python object (a function or a class for +example) as the \member{restype} attribute. It will be called with the +\code{integer} the C function returns, and the result of this call will +be used as the result of your function call. This is useful to check +for error return values and automatically raise an exception: +\begin{verbatim} +>>> GetModuleHandle = windll.kernel32.GetModuleHandleA # doctest: +WINDOWS +>>> def ValidHandle(value): +... if value == 0: +... raise WinError() +... return value +... +>>> +>>> GetModuleHandle.restype = ValidHandle # doctest: +WINDOWS +>>> GetModuleHandle(None) # doctest: +WINDOWS +486539264 +>>> GetModuleHandle("something silly") # doctest: +WINDOWS +IGNORE_EXCEPTION_DETAIL +Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "<stdin>", line 3, in ValidHandle +WindowsError: [Errno 126] The specified module could not be found. +>>> +\end{verbatim} + +\code{WinError} is a function which will call Windows \code{FormatMessage()} +api to get the string representation of an error code, and \emph{returns} +an exception. \code{WinError} takes an optional error code parameter, if +no one is used, it calls \function{GetLastError()} to retrieve it. + + +\subsubsection{Passing pointers (or: passing parameters by reference)\label{ctypes-passing-pointers}} + +Sometimes a C api function expects a \emph{pointer} to a data type as +parameter, probably to write into the corresponding location, or if +the data is too large to be passed by value. This is also known as +\emph{passing parameters by reference}. + +\code{ctypes} exports the \function{byref} function which is used to pass +parameters by reference. The same effect can be achieved with the +\code{pointer} function, although \code{pointer} does a lot more work since +it constructs a real pointer object, so it is faster to use \function{byref} +if you don't need the pointer object in Python itself: +\begin{verbatim} +>>> i = c_int() +>>> f = c_float() +>>> s = create_string_buffer('\000' * 32) +>>> print i.value, f.value, repr(s.value) +0 0.0 '' +>>> libc.sscanf("1 3.14 Hello", "%d %f %s", +... byref(i), byref(f), s) +3 +>>> print i.value, f.value, repr(s.value) +1 3.1400001049 'Hello' +>>> +\end{verbatim} + + +\subsubsection{Structures and unions\label{ctypes-structures-unions}} + +Structures and unions must derive from the \class{Structure} and \class{Union} +base classes which are defined in the \code{ctypes} module. Each subclass +must define a \member{{\_}fields{\_}} attribute. \member{{\_}fields{\_}} must be a list of +\emph{2-tuples}, containing a \emph{field name} and a \emph{field type}. + +The field type must be a \code{ctypes} type like \class{c{\_}int}, or any other +derived \code{ctypes} type: structure, union, array, pointer. + +Here is a simple example of a POINT structure, which contains two +integers named \code{x} and \code{y}, and also shows how to initialize a +structure in the constructor: +\begin{verbatim} +>>> from ctypes import * +>>> class POINT(Structure): +... _fields_ = [("x", c_int), +... ("y", c_int)] +... +>>> point = POINT(10, 20) +>>> print point.x, point.y +10 20 +>>> point = POINT(y=5) +>>> print point.x, point.y +0 5 +>>> POINT(1, 2, 3) +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ValueError: too many initializers +>>> +\end{verbatim} + +You can, however, build much more complicated structures. Structures +can itself contain other structures by using a structure as a field +type. + +Here is a RECT structure which contains two POINTs named \code{upperleft} +and \code{lowerright} +\begin{verbatim} +>>> class RECT(Structure): +... _fields_ = [("upperleft", POINT), +... ("lowerright", POINT)] +... +>>> rc = RECT(point) +>>> print rc.upperleft.x, rc.upperleft.y +0 5 +>>> print rc.lowerright.x, rc.lowerright.y +0 0 +>>> +\end{verbatim} + +Nested structures can also be initialized in the constructor in +several ways: +\begin{verbatim} +>>> r = RECT(POINT(1, 2), POINT(3, 4)) +>>> r = RECT((1, 2), (3, 4)) +\end{verbatim} + +Fields descriptors can be retrieved from the \emph{class}, they are useful +for debugging because they can provide useful information: +\begin{verbatim} +>>> print POINT.x +<Field type=c_long, ofs=0, size=4> +>>> print POINT.y +<Field type=c_long, ofs=4, size=4> +>>> +\end{verbatim} + + +\subsubsection{Structure/union alignment and byte order\label{ctypes-structureunion-alignment-byte-order}} + +By default, Structure and Union fields are aligned in the same way the +C compiler does it. It is possible to override this behaviour be +specifying a \member{{\_}pack{\_}} class attribute in the subclass +definition. This must be set to a positive integer and specifies the +maximum alignment for the fields. This is what \code{{\#}pragma pack(n)} +also does in MSVC. + +\code{ctypes} uses the native byte order for Structures and Unions. To +build structures with non-native byte order, you can use one of the +BigEndianStructure, LittleEndianStructure, BigEndianUnion, and +LittleEndianUnion base classes. These classes cannot contain pointer +fields. + + +\subsubsection{Bit fields in structures and unions\label{ctypes-bit-fields-in-structures-unions}} + +It is possible to create structures and unions containing bit fields. +Bit fields are only possible for integer fields, the bit width is +specified as the third item in the \member{{\_}fields{\_}} tuples: +\begin{verbatim} +>>> class Int(Structure): +... _fields_ = [("first_16", c_int, 16), +... ("second_16", c_int, 16)] +... +>>> print Int.first_16 +<Field type=c_long, ofs=0:0, bits=16> +>>> print Int.second_16 +<Field type=c_long, ofs=0:16, bits=16> +>>> +\end{verbatim} + + +\subsubsection{Arrays\label{ctypes-arrays}} + +Arrays are sequences, containing a fixed number of instances of the +same type. + +The recommended way to create array types is by multiplying a data +type with a positive integer: +\begin{verbatim} +TenPointsArrayType = POINT * 10 +\end{verbatim} + +Here is an example of an somewhat artifical data type, a structure +containing 4 POINTs among other stuff: +\begin{verbatim} +>>> from ctypes import * +>>> class POINT(Structure): +... _fields_ = ("x", c_int), ("y", c_int) +... +>>> class MyStruct(Structure): +... _fields_ = [("a", c_int), +... ("b", c_float), +... ("point_array", POINT * 4)] +>>> +>>> print len(MyStruct().point_array) +4 +\end{verbatim} + +Instances are created in the usual way, by calling the class: +\begin{verbatim} +arr = TenPointsArrayType() +for pt in arr: + print pt.x, pt.y +\end{verbatim} + +The above code print a series of \code{0 0} lines, because the array +contents is initialized to zeros. + +Initializers of the correct type can also be specified: +\begin{verbatim} +>>> from ctypes import * +>>> TenIntegers = c_int * 10 +>>> ii = TenIntegers(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) +>>> print ii +<c_long_Array_10 object at 0x...> +>>> for i in ii: print i, +... +1 2 3 4 5 6 7 8 9 10 +>>> +\end{verbatim} + + +\subsubsection{Pointers\label{ctypes-pointers}} + +Pointer instances are created by calling the \code{pointer} function on a +\code{ctypes} type: +\begin{verbatim} +>>> from ctypes import * +>>> i = c_int(42) +>>> pi = pointer(i) +>>> +\end{verbatim} + +XXX XXX Not correct: use indexing, not the contents atribute + +Pointer instances have a \code{contents} attribute which returns the +ctypes' type pointed to, the \code{c{\_}int(42)} in the above case: +\begin{verbatim} +>>> pi.contents +c_long(42) +>>> +\end{verbatim} + +Assigning another \class{c{\_}int} instance to the pointer's contents +attribute would cause the pointer to point to the memory location +where this is stored: +\begin{verbatim} +>>> pi.contents = c_int(99) +>>> pi.contents +c_long(99) +>>> +\end{verbatim} + +Pointer instances can also be indexed with integers: +\begin{verbatim} +>>> pi[0] +99 +>>> +\end{verbatim} + +XXX What is this??? +Assigning to an integer index changes the pointed to value: +\begin{verbatim} +>>> i2 = pi[0] +>>> i2 +99 +>>> pi[0] = 22 +>>> i2 +99 +>>> +\end{verbatim} + +It is also possible to use indexes different from 0, but you must know +what you're doing when you use this: You access or change arbitrary +memory locations when you do this. Generally you only use this feature +if you receive a pointer from a C function, and you \emph{know} that the +pointer actually points to an array instead of a single item. + + +\subsubsection{Pointer classes/types\label{ctypes-pointer-classestypes}} + +Behind the scenes, the \code{pointer} function does more than simply +create pointer instances, it has to create pointer \emph{types} first. +This is done with the \code{POINTER} function, which accepts any +\code{ctypes} type, and returns a new type: +\begin{verbatim} +>>> PI = POINTER(c_int) +>>> PI +<class 'ctypes.LP_c_long'> +>>> PI(42) # doctest: +IGNORE_EXCEPTION_DETAIL +Traceback (most recent call last): + File "<stdin>", line 1, in ? +TypeError: expected c_long instead of int +>>> PI(c_int(42)) +<ctypes.LP_c_long object at 0x...> +>>> +\end{verbatim} + + +\subsubsection{Incomplete Types\label{ctypes-incomplete-types}} + +\emph{Incomplete Types} are structures, unions or arrays whose members are +not yet specified. In C, they are specified by forward declarations, which +are defined later: +\begin{verbatim} +struct cell; /* forward declaration */ + +struct { + char *name; + struct cell *next; +} cell; +\end{verbatim} + +The straightforward translation into ctypes code would be this, but it +does not work: +\begin{verbatim} +>>> class cell(Structure): +... _fields_ = [("name", c_char_p), +... ("next", POINTER(cell))] +... +Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "<stdin>", line 2, in cell +NameError: name 'cell' is not defined +>>> +\end{verbatim} + +because the new \code{class cell} is not available in the class statement +itself. In \code{ctypes}, we can define the \code{cell} class and set the +\member{{\_}fields{\_}} attribute later, after the class statement: +\begin{verbatim} +>>> from ctypes import * +>>> class cell(Structure): +... pass +... +>>> cell._fields_ = [("name", c_char_p), +... ("next", POINTER(cell))] +>>> +\end{verbatim} + +Lets try it. We create two instances of \code{cell}, and let them point +to each other, and finally follow the pointer chain a few times: +\begin{verbatim} +>>> c1 = cell() +>>> c1.name = "foo" +>>> c2 = cell() +>>> c2.name = "bar" +>>> c1.next = pointer(c2) +>>> c2.next = pointer(c1) +>>> p = c1 +>>> for i in range(8): +... print p.name, +... p = p.next[0] +... +foo bar foo bar foo bar foo bar +>>> +\end{verbatim} + + +\subsubsection{Callback functions\label{ctypes-callback-functions}} + +\code{ctypes} allows to create C callable function pointers from Python +callables. These are sometimes called \emph{callback functions}. + +First, you must create a class for the callback function, the class +knows the calling convention, the return type, and the number and +types of arguments this function will receive. + +The CFUNCTYPE factory function creates types for callback functions +using the normal cdecl calling convention, and, on Windows, the +WINFUNCTYPE factory function creates types for callback functions +using the stdcall calling convention. + +Both of these factory functions are called with the result type as +first argument, and the callback functions expected argument types as +the remaining arguments. + +I will present an example here which uses the standard C library's +\function{qsort} function, this is used to sort items with the help of a +callback function. \function{qsort} will be used to sort an array of +integers: +\begin{verbatim} +>>> IntArray5 = c_int * 5 +>>> ia = IntArray5(5, 1, 7, 33, 99) +>>> qsort = libc.qsort +>>> qsort.restype = None +>>> +\end{verbatim} + +\function{qsort} must be called with a pointer to the data to sort, the +number of items in the data array, the size of one item, and a pointer +to the comparison function, the callback. The callback will then be +called with two pointers to items, and it must return a negative +integer if the first item is smaller than the second, a zero if they +are equal, and a positive integer else. + +So our callback function receives pointers to integers, and must +return an integer. First we create the \code{type} for the callback +function: +\begin{verbatim} +>>> CMPFUNC = CFUNCTYPE(c_int, POINTER(c_int), POINTER(c_int)) +>>> +\end{verbatim} + +For the first implementation of the callback function, we simply print +the arguments we get, and return 0 (incremental development ;-): +\begin{verbatim} +>>> def py_cmp_func(a, b): +... print "py_cmp_func", a, b +... return 0 +... +>>> +\end{verbatim} + +Create the C callable callback: +\begin{verbatim} +>>> cmp_func = CMPFUNC(py_cmp_func) +>>> +\end{verbatim} + +And we're ready to go: +\begin{verbatim} +>>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +WINDOWS +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...> +>>> +\end{verbatim} + +We know how to access the contents of a pointer, so lets redefine our callback: +\begin{verbatim} +>>> def py_cmp_func(a, b): +... print "py_cmp_func", a[0], b[0] +... return 0 +... +>>> cmp_func = CMPFUNC(py_cmp_func) +>>> +\end{verbatim} + +Here is what we get on Windows: +\begin{verbatim} +>>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +WINDOWS +py_cmp_func 7 1 +py_cmp_func 33 1 +py_cmp_func 99 1 +py_cmp_func 5 1 +py_cmp_func 7 5 +py_cmp_func 33 5 +py_cmp_func 99 5 +py_cmp_func 7 99 +py_cmp_func 33 99 +py_cmp_func 7 33 +>>> +\end{verbatim} + +It is funny to see that on linux the sort function seems to work much +more efficient, it is doing less comparisons: +\begin{verbatim} +>>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +LINUX +py_cmp_func 5 1 +py_cmp_func 33 99 +py_cmp_func 7 33 +py_cmp_func 5 7 +py_cmp_func 1 7 +>>> +\end{verbatim} + +Ah, we're nearly done! The last step is to actually compare the two +items and return a useful result: +\begin{verbatim} +>>> def py_cmp_func(a, b): +... print "py_cmp_func", a[0], b[0] +... return a[0] - b[0] +... +>>> +\end{verbatim} + +Final run on Windows: +\begin{verbatim} +>>> qsort(ia, len(ia), sizeof(c_int), CMPFUNC(py_cmp_func)) # doctest: +WINDOWS +py_cmp_func 33 7 +py_cmp_func 99 33 +py_cmp_func 5 99 +py_cmp_func 1 99 +py_cmp_func 33 7 +py_cmp_func 1 33 +py_cmp_func 5 33 +py_cmp_func 5 7 +py_cmp_func 1 7 +py_cmp_func 5 1 +>>> +\end{verbatim} + +and on Linux: +\begin{verbatim} +>>> qsort(ia, len(ia), sizeof(c_int), CMPFUNC(py_cmp_func)) # doctest: +LINUX +py_cmp_func 5 1 +py_cmp_func 33 99 +py_cmp_func 7 33 +py_cmp_func 1 7 +py_cmp_func 5 7 +>>> +\end{verbatim} + +So, our array sorted now: +\begin{verbatim} +>>> for i in ia: print i, +... +1 5 7 33 99 +>>> +\end{verbatim} + +\textbf{Important note for callback functions:} + +Make sure you keep references to CFUNCTYPE objects as long as they are +used from C code. ctypes doesn't, and if you don't, they may be +garbage collected, crashing your program when a callback is made. + + +\subsubsection{Accessing values exported from dlls\label{ctypes-accessing-values-exported-from-dlls}} + +Sometimes, a dll not only exports functions, it also exports +values. An example in the Python library itself is the +\code{Py{\_}OptimizeFlag}, an integer set to 0, 1, or 2, depending on the +\programopt{-O} or \programopt{-OO} flag given on startup. + +\code{ctypes} can access values like this with the \method{in{\_}dll} class +methods of the type. \var{pythonapi} ìs a predefined symbol giving +access to the Python C api: +\begin{verbatim} +>>> opt_flag = c_int.in_dll(pythonapi, "Py_OptimizeFlag") +>>> print opt_flag +c_long(0) +>>> +\end{verbatim} + +If the interpreter would have been started with \programopt{-O}, the sample +would have printed \code{c{\_}long(1)}, or \code{c{\_}long(2)} if \programopt{-OO} would have +been specified. + +An extended example which also demonstrates the use of pointers +accesses the \code{PyImport{\_}FrozenModules} pointer exported by Python. + +Quoting the Python docs: \emph{This pointer is initialized to point to an +array of ``struct {\_}frozen`` records, terminated by one whose members +are all NULL or zero. When a frozen module is imported, it is searched +in this table. Third-party code could play tricks with this to provide +a dynamically created collection of frozen modules.} + +So manipulating this pointer could even prove useful. To restrict the +example size, we show only how this table can be read with +\code{ctypes}: +\begin{verbatim} +>>> from ctypes import * +>>> +>>> class struct_frozen(Structure): +... _fields_ = [("name", c_char_p), +... ("code", POINTER(c_ubyte)), +... ("size", c_int)] +... +>>> +\end{verbatim} + +We have defined the \code{struct {\_}frozen} data type, so we can get the +pointer to the table: +\begin{verbatim} +>>> FrozenTable = POINTER(struct_frozen) +>>> table = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules") +>>> +\end{verbatim} + +Since \code{table} is a \code{pointer} to the array of \code{struct{\_}frozen} +records, we can iterate over it, but we just have to make sure that +our loop terminates, because pointers have no size. Sooner or later it +would probably crash with an access violation or whatever, so it's +better to break out of the loop when we hit the NULL entry: +\begin{verbatim} +>>> for item in table: +... print item.name, item.size +... if item.name is None: +... break +... +__hello__ 104 +__phello__ -104 +__phello__.spam 104 +None 0 +>>> +\end{verbatim} + +The fact that standard Python has a frozen module and a frozen package +(indicated by the negative size member) is not wellknown, it is only +used for testing. Try it out with \code{import {\_}{\_}hello{\_}{\_}} for example. + +XXX Describe how to access the \var{code} member fields, which contain +the byte code for the modules. + + +\subsubsection{Surprises\label{ctypes-surprises}} + +There are some edges in \code{ctypes} where you may be expect something +else than what actually happens. + +Consider the following example: +\begin{verbatim} +>>> from ctypes import * +>>> class POINT(Structure): +... _fields_ = ("x", c_int), ("y", c_int) +... +>>> class RECT(Structure): +... _fields_ = ("a", POINT), ("b", POINT) +... +>>> p1 = POINT(1, 2) +>>> p2 = POINT(3, 4) +>>> rc = RECT(p1, p2) +>>> print rc.a.x, rc.a.y, rc.b.x, rc.b.y +1 2 3 4 +>>> # now swap the two points +>>> rc.a, rc.b = rc.b, rc.a +>>> print rc.a.x, rc.a.y, rc.b.x, rc.b.y +3 4 3 4 +\end{verbatim} + +Hm. We certainly expected the last statement to print \code{3 4 1 2}. +What happended? Here are the steps of the \code{rc.a, rc.b = rc.b, rc.a} +line above: +\begin{verbatim} +>>> temp0, temp1 = rc.b, rc.a +>>> rc.a = temp0 +>>> rc.b = temp1 +\end{verbatim} + +Note that \code{temp0} and \code{temp1} are objects still using the internal +buffer of the \code{rc} object above. So executing \code{rc.a = temp0} +copies the buffer contents of \code{temp0} into \code{rc} 's buffer. This, +in turn, changes the contents of \code{temp1}. So, the last assignment +\code{rc.b = temp1}, doesn't have the expected effect. + +Keep in mind that retrieving subobjects from Structure, Unions, and +Arrays doesn't \emph{copy} the subobject, instead it retrieves a wrapper +object accessing the root-object's underlying buffer. + +Another example that may behave different from what one would expect is this: +\begin{verbatim} +>>> s = c_char_p() +>>> s.value = "abc def ghi" +>>> s.value +'abc def ghi' +>>> s.value is s.value +False +>>> +\end{verbatim} + +Why is it printing \code{False}? ctypes instances are objects containing +a memory block plus some descriptors accessing the contents of the +memory. Storing a Python object in the memory block does not store +the object itself, instead the \code{contents} of the object is stored. +Accessing the contents again constructs a new Python each time! + + +\subsubsection{Bugs, ToDo and non-implemented things\label{ctypes-bugs-todo-non-implemented-things}} + +Enumeration types are not implemented. You can do it easily yourself, +using \class{c{\_}int} as the base class. + +\code{long double} is not implemented. +% Local Variables: +% compile-command: "make.bat" +% End: + diff --git a/Doc/lib/libctypesref.tex b/Doc/lib/libctypesref.tex new file mode 100644 index 0000000..6d950f4 --- /dev/null +++ b/Doc/lib/libctypesref.tex @@ -0,0 +1,457 @@ +\subsection{ctypes reference\label{ctypes-reference}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% functions +\subsubsection{ctypes functions} + +\begin{funcdesc}{addressof}{obj} +Returns the address of the memory buffer as integer. \var{obj} must +be an instance of a ctypes type. +\end{funcdesc} + +\begin{funcdesc}{alignment}{obj_or_type} +Returns the alignment requirements of a ctypes type. +\var{obj_or_type} must be a ctypes type or an instance. +\end{funcdesc} + +\begin{excclassdesc}{ArgumentError}{} +This exception is raised when a foreign function call cannot convert +one of the passed arguments. +\end{excclassdesc} + +\begin{funcdesc}{byref}{obj} +Returns a light-weight pointer to \var{obj}, which must be an instance +of a ctypes type. The returned object can only be used as a foreign +function call parameter. It behaves similar to \code{pointer(obj)}, +but the construction is a lot faster. +\end{funcdesc} + +\begin{funcdesc}{cast}{obj, type} +This function is similar to the cast operator in C. It returns a new +instance of \var{type} which points to the same memory block as +\code{obj}. \code{type} must be a pointer type, and \code{obj} + must be an object that can be interpreted as a pointer. +\end{funcdesc} + +% XXX separate section for CFUNCTYPE, WINFUNCTYPE, PYFUNCTYPE? + +\begin{funcdesc}{CFUNCTYPE}{restype, *argtypes} +This is a factory function that returns a function prototype. The +function prototype describes a function that has a result type of +\code{restype}, and accepts arguments as specified by \code{argtypes}. +The function prototype can be used to construct several kinds of +functions, depending on how the prototype is called. + +The prototypes returned by \code{CFUNCTYPE} or \code{PYFUNCTYPE} +create functions that use the standard C calling convention, +prototypes returned from \code{WINFUNCTYPE} (on Windows) use the +\code{__stdcall} calling convention. + +Functions created by calling the \code{CFUNCTYPE} and +\code{WINFUNCTYPE} prototypes release the Python GIL +before entering the foreign function, and acquire it back after +leaving the function code. + +% XXX differences between CFUNCTYPE / WINFUNCTYPE / PYFUNCTYPE + +\end{funcdesc} + +\begin{funcdesc}{create_string_buffer}{init_or_size\optional{, size}} +This function creates a mutable character buffer. The returned object +is a ctypes array of \code{c_char}. + +\var{init_or_size} must be an integer which specifies the size of the +array, or a string which will be used to initialize the array items. + +If a string is specified as first argument, the buffer is made one +item larger than the length of the string so that the last element in +the array is a NUL termination character. An integer can be passed as +second argument which allows to specify the size of the array if the +length of the string should not be used. + +If the first parameter is a unicode string, it is converted into an +8-bit string according to ctypes conversion rules. +\end{funcdesc} + +\begin{funcdesc}{create_unicode_buffer}{init_or_size\optional{, size}} +This function creates a mutable unicode character buffer. The +returned object is a ctypes array of \code{c_wchar}. + +\var{init_or_size} must be an integer which specifies the size of the +array, or a unicode string which will be used to initialize the array +items. + +If a unicode string is specified as first argument, the buffer is made +one item larger than the length of the string so that the last element +in the array is a NUL termination character. An integer can be passed +as second argument which allows to specify the size of the array if +the length of the string should not be used. + +If the first parameter is a 8-bit string, it is converted into an +unicode string according to ctypes conversion rules. +\end{funcdesc} + +\begin{funcdesc}{DllCanUnloadNow}{} +Windows only: This function is a hook which allows to implement +inprocess COM servers with ctypes. It is called from the +\code{DllCanUnloadNow} function that the \code{_ctypes} +extension dll exports. +\end{funcdesc} + +\begin{funcdesc}{DllGetClassObject}{} +Windows only: This function is a hook which allows to implement +inprocess COM servers with ctypes. It is called from the +\code{DllGetClassObject} function that the \code{_ctypes} +extension dll exports. +\end{funcdesc} + +\begin{funcdesc}{FormatError}{\optional{code}} +Windows only: Returns a textual description of the error code. If no +error code is specified, the last error code is used by calling the +Windows api function \code{GetLastError}. +\end{funcdesc} + +\begin{funcdesc}{GetLastError}{} +Windows only: Returns the last error code set by Windows in the +calling thread. +\end{funcdesc} + +\begin{funcdesc}{memmove}{dst, src, count} +Same as the standard C \code{memmove} library function: copies +\var{count} bytes from \code{src} to \code{dst}. \code{dst} and +\code{src} must be integers or ctypes instances that can be converted to pointers. +\end{funcdesc} + +\begin{funcdesc}{memset}{dst, c, count} +Same as the standard C \code{memset} library function: fills the +memory clock at address \code{dst} with \var{count} bytes of value +\var{c}. \var{dst} must be an integer specifying an address, or a ctypes instance. +\end{funcdesc} + +\begin{funcdesc}{POINTER}{type} +This factory function creates and returns a new ctypes pointer type. +Pointer types are cached an reused internally, so calling this +function repeatedly is cheap. \var{type} must be a ctypes type. +\end{funcdesc} + +\begin{funcdesc}{pointer}{obj} +This function creates a new pointer instance, pointing to \var{obj}. +The returned object is of the type \code{POINTER(type(obj))}. + +Note: If you just want to pass a pointer to an object to a foreign +function call, you should use \code{byref(obj)} which is much faster. +\end{funcdesc} + +\begin{funcdesc}{PYFUNCTYPE}{restype, *argtypes} +\end{funcdesc} + +\begin{funcdesc}{pythonapi}{} +\end{funcdesc} + +\begin{funcdesc}{resize}{obj, size} +This function resizes the internal memory buffer of \var{obj}, which +must be an instance of a ctypes type. It is not possible to make the +buffer smaller than the native size of the objects type, as given by +\code{sizeof(type(obj))}, but it is possible to enlarge the buffer. +\end{funcdesc} + +\begin{funcdesc}{set_conversion_mode}{encoding, errors} +This function sets the rules that ctypes objects use when converting +between 8-bit strings and unicode strings. \var{encoding} must be a +string specifying an encoding, like 'utf-8' or 'mbcs', \var{errors} +must be a string specifying the error handling on encoding/decoding +errors. Examples of possible values are ``strict'', ``replace'', or +``ignore''. + +\code{set_conversion_mode} returns a 2-tuple containing the previous +conversion rules. On windows, the initial conversion rules are +\code{('mbcs', 'ignore')}, on other systems \code{('ascii', 'strict')}. +\end{funcdesc} + +\begin{funcdesc}{sizeof}{obj_or_type} +Returns the size in bytes of a ctypes type or instance memory buffer. +Does the same as the C sizeof() function. +\end{funcdesc} + +\begin{funcdesc}{string_at}{address\optional{size}} +This function returns the string starting at memory address +\var{address}. If \var{size} is specified, it is used as size, +otherwise the string is assumed to be zero-terminated. +\end{funcdesc} + +\begin{funcdesc}{WinError}{code=None, descr=None} +Windows only: this function is probably the worst-named thing in +ctypes. It creates an instance of \code{WindowsError}. If \var{code} +is not specified, \code{GetLastError} is called to determine the error +code. If \var{descr} is not spcified, \var{FormatError} is called to +get a textual description of the error. +\end{funcdesc} + +\begin{funcdesc}{WINFUNCTYPE}{restype, *argtypes} +\end{funcdesc} + +\begin{funcdesc}{wstring_at}{address} +This function returns the wide character string starting at memory +address \var{address} as unicode string. If \var{size} is specified, +it is used as size, otherwise the string is assumed to be +zero-terminated. +\end{funcdesc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% data types +\subsubsection{data types} + +ctypes defines a lot of C compatible datatypes, and also allows to +define your own types. Among other things, a ctypes type instance +holds a memory block that contains C compatible data. + +\begin{classdesc}{_ctypes._CData}{} +This non-public class is the base class of all ctypes data types. It +is mentioned here because it contains the common methods of the ctypes +data types. +\end{classdesc} + +Common methods of ctypes data types, these are all class methods (to +be exact, they are methods of the metaclass): + +\begin{methoddesc}{from_address}{address} +This method returns a ctypes type instance using the memory specified +by \code{address}. +\end{methoddesc} + +\begin{methoddesc}{from_param}{obj} +This method adapts \code{obj} to a ctypes type. +\end{methoddesc} + +\begin{methoddesc}{in_dll}{name, library} +This method returns a ctypes type instance exported by a shared +library. \var{name} is the name of the symbol that exports the data, +\var{library} is the loaded shared library. +\end{methoddesc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% simple data types +\subsubsection{simple data types} + +\begin{classdesc}{_ctypes._SimpleCData}{} +This non-public class is the base class of all ctypes data types. It +is mentioned here because it contains the common attributes of the +ctypes data types. +\end{classdesc} + +\begin{memberdesc}{value} +This attribute contains the actual value of the instance. For integer +types, it is an integer. +\end{memberdesc} + +Here are the simple ctypes data types: + +\begin{classdesc}{c_byte}{\optional{value}} +Represents a C \code{signed char} datatype, and interprets the value +as small integer. The constructor accepts an optional integer +initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_char}{\optional{value}} +Represents a C \code{char} datatype, and interprets the value as a +single character. The constructor accepts an optional string +initializer, the length of the string must be exactly one character. +\end{classdesc} + +\begin{classdesc}{c_char_p}{\optional{value}} +Represents a C \code{char *} datatype, which must be a pointer to a +zero-terminated string. The constructor accepts an integer address, +or a string. +% XXX Explain the difference to POINTER(c_char) +\end{classdesc} + +\begin{classdesc}{c_double}{\optional{value}} +Represents a C \code{double} datatype. The constructor accepts an +optional float initializer. +\end{classdesc} + +\begin{classdesc}{c_float}{\optional{value}} +Represents a C \code{double} datatype. The constructor accepts an +optional float initializer. +\end{classdesc} + +\begin{classdesc}{c_int}{\optional{value}} +Represents a C \code{signed int} datatype. The constructor accepts an +optional integer initializer; no overflow checking is done. On +platforms where \code{sizeof(int) == sizeof(long)} \var{c_int} is an +alias to \var{c_long}. +\end{classdesc} + +\begin{classdesc}{c_int16}{\optional{value}} +Represents a C 16-bit \code{signed int} datatype. Usually an alias +for \var{c_short}. +\end{classdesc} + +\begin{classdesc}{c_int32}{\optional{value}} +Represents a C 32-bit \code{signed int} datatype. Usually an alias +for \code{c_int}. +\end{classdesc} + +\begin{classdesc}{c_int64}{\optional{value}} +Represents a C 64-bit \code{signed int} datatype. Usually an alias +for \code{c_longlong}. +\end{classdesc} + +\begin{classdesc}{c_int8}{\optional{value}} +Represents a C 8-bit \code{signed int} datatype. Usually an alias for \code{c_byte}. +\end{classdesc} + +\begin{classdesc}{c_long}{\optional{value}} +Represents a C \code{signed long} datatype. The constructor accepts +an optional integer initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_longlong}{\optional{value}} +Represents a C \code{signed long long} datatype. The constructor +accepts an optional integer initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_short}{\optional{value}} +Represents a C \code{signed short} datatype. The constructor accepts +an optional integer initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_size_t}{\optional{value}} +Represents a C \code{size_t} datatype. +\end{classdesc} + +\begin{classdesc}{c_ubyte}{\optional{value}} +Represents a C \code{unsigned char} datatype, and interprets the value +as small integer. The constructor accepts an optional integer +initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_uint}{\optional{value}} +Represents a C \code{unsigned int} datatype. The constructor accepts +an optional integer initializer; no overflow checking is done. On +platforms where \code{sizeof(int) == sizeof(long)} \var{c_int} is an +alias to \var{c_long}. +\end{classdesc} + +\begin{classdesc}{c_uint16}{\optional{value}} +Represents a C 16-bit \code{unsigned int} datatype. Usually an alias +for \code{c_ushort}. +\end{classdesc} + +\begin{classdesc}{c_uint32}{\optional{value}} +Represents a C 32-bit \code{unsigned int} datatype. Usually an alias +for \code{c_uint}. +\end{classdesc} + +\begin{classdesc}{c_uint64}{\optional{value}} +Represents a C 64-bit \code{unsigned int} datatype. Usually an alias +for \code{c_ulonglong}. +\end{classdesc} + +\begin{classdesc}{c_uint8}{\optional{value}} +Represents a C 8-bit \code{unsigned int} datatype. Usually an alias +for \code{c_ubyte}. +\end{classdesc} + +\begin{classdesc}{c_ulong}{\optional{value}} +Represents a C \code{unsigned long} datatype. The constructor accepts +an optional integer initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_ulonglong}{\optional{value}} +Represents a C \code{unsigned long long} datatype. The constructor +accepts an optional integer initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_ushort}{\optional{value}} +Represents a C \code{unsigned short} datatype. The constructor accepts +an optional integer initializer; no overflow checking is done. +\end{classdesc} + +\begin{classdesc}{c_void_p}{\optional{value}} +Represents a C \code{void *} type. The value is represented as +integer. The constructor accepts an optional integer initializer. +\end{classdesc} + +\begin{classdesc}{c_wchar}{\optional{value}} +Represents a C \code{wchar_t} datatype, and interprets the value as a +single character unicode string. The constructor accepts an optional +string initializer, the length of the string must be exactly one +character. +\end{classdesc} + +\begin{classdesc}{c_wchar_p}{\optional{value}} +Represents a C \code{wchar_t *} datatype, which must be a pointer to a +zero-terminated wide character string. The constructor accepts an +integer address, or a string. +% XXX Explain the difference to POINTER(c_wchar) +\end{classdesc} + +\begin{classdesc}{HRESULT}{} +Windows only: Represents a \code{HRESULT} value, which contains +success or error information for a function or method call. +\end{classdesc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% structured data types +\subsubsection{structured data types} + +\begin{classdesc}{BigEndianStructure}{} +\end{classdesc} + +\begin{classdesc}{LittleEndianStructure}{} +\end{classdesc} + +\begin{classdesc}{Structure}{} +Base class for Structure data types. + +\end{classdesc} + +\begin{classdesc}{Union}{} +\end{classdesc} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% libraries +\subsubsection{libraries} + +\begin{classdesc}{CDLL}{name, mode=RTLD_LOCAL, handle=None} +\end{classdesc} + +\begin{datadesc}{cdll} +\end{datadesc} + +\begin{classdesc}{LibraryLoader}{dlltype} + +\begin{memberdesc}{LoadLibrary}{name, mode=RTLD_LOCAL, handle=None} +\end{memberdesc} + +\end{classdesc} + +\begin{classdesc}{OleDLL}{name, mode=RTLD_LOCAL, handle=None} +\end{classdesc} + +\begin{datadesc}{oledll} +\end{datadesc} + +\begin{classdesc}{py_object}{} +\end{classdesc} + +\begin{classdesc}{PyDLL}{name, mode=RTLD_LOCAL, handle=None} +\end{classdesc} + +\begin{datadesc}{pydll}{} +\end{datadesc} + +\begin{datadesc}{RTLD_GLOBAL} +\end{datadesc} + +\begin{datadesc}{RTLD_LOCAL} +\end{datadesc} + +\begin{classdesc}{WinDLL}{name, mode=RTLD_LOCAL, handle=None} +\end{classdesc} + +\begin{datadesc}{windll} +\end{datadesc} + diff --git a/Doc/lib/libcursespanel.tex b/Doc/lib/libcursespanel.tex index 519091e..1f96717 100644 --- a/Doc/lib/libcursespanel.tex +++ b/Doc/lib/libcursespanel.tex @@ -22,6 +22,9 @@ Returns the bottom panel in the panel stack. \begin{funcdesc}{new_panel}{win} Returns a panel object, associating it with the given window \var{win}. +Be aware that you need to keep the returned panel object referenced +explicitly. If you don't, the panel object is garbage collected and +removed from the panel stack. \end{funcdesc} \begin{funcdesc}{top_panel}{} diff --git a/Doc/lib/libdecimal.tex b/Doc/lib/libdecimal.tex index ffc3363..a0c7bde 100644 --- a/Doc/lib/libdecimal.tex +++ b/Doc/lib/libdecimal.tex @@ -713,8 +713,8 @@ here. \constant{NaN}. \end{methoddesc} -\begin{methoddesc}{sqrt}{} - Return the square root to full precision. +\begin{methoddesc}{sqrt}{x} + Return the square root of \var{x} to full precision. \end{methoddesc} \begin{methoddesc}{subtract}{x, y} @@ -734,7 +734,7 @@ here. or \constant{Rounded}. \end{methoddesc} -\begin{methoddesc}{to_sci_string}{} +\begin{methoddesc}{to_sci_string}{x} Converts a number to a string using scientific notation. \end{methoddesc} diff --git a/Doc/lib/libdis.tex b/Doc/lib/libdis.tex index 19fda5b..27b8a5c 100644 --- a/Doc/lib/libdis.tex +++ b/Doc/lib/libdis.tex @@ -55,7 +55,7 @@ was provided. The output is divided in the following columns: \begin{enumerate} \item the line number, for the first instruction of each line \item the current instruction, indicated as \samp{-->}, -\item a labelled instruction, indicated with \samp{>\code{>}}, +\item a labelled instruction, indicated with \samp{>>}, \item the address of the instruction, \item the operation code name, \item operation parameters, and diff --git a/Doc/lib/libdoctest.tex b/Doc/lib/libdoctest.tex index 0e3a017..73b29ad 100644 --- a/Doc/lib/libdoctest.tex +++ b/Doc/lib/libdoctest.tex @@ -333,8 +333,8 @@ NO!!! \end{verbatim} Any expected output must immediately follow the final -\code{'>\code{>}>~'} or \code{'...~'} line containing the code, and -the expected output (if any) extends to the next \code{'>\code{>}>~'} +\code{'>>>~'} or \code{'...~'} line containing the code, and +the expected output (if any) extends to the next \code{'>>>~'} or all-whitespace line. The fine print: @@ -386,7 +386,7 @@ Backslashes in a raw docstring: m\n \end{verbatim} and as many leading whitespace characters are stripped from the -expected output as appeared in the initial \code{'>\code{>}>~'} line +expected output as appeared in the initial \code{'>>>~'} line that started the example. \end{itemize} @@ -407,10 +407,13 @@ You can force use of your own dict as the execution context by passing \subsubsection{What About Exceptions?\label{doctest-exceptions}} No problem, provided that the traceback is the only output produced by -the example: just paste in the traceback. Since tracebacks contain -details that are likely to change rapidly (for example, exact file paths -and line numbers), this is one case where doctest works hard to be -flexible in what it accepts. +the example: just paste in the traceback.\footnote{Examples containing + both expected output and an exception are not supported. Trying + to guess where one ends and the other begins is too error-prone, + and that also makes for a confusing test.} +Since tracebacks contain details that are likely to change rapidly (for +example, exact file paths and line numbers), this is one case where doctest +works hard to be flexible in what it accepts. Simple example: @@ -613,6 +616,20 @@ TypeError: object doesn't support item assignment \end{datadesc} +\begin{datadesc}{SKIP} + + When specified, do not run the example at all. This can be useful + in contexts where doctest examples serve as both documentation and + test cases, and an example should be included for documentation + purposes, but should not be checked. E.g., the example's output + might be random; or the example might depend on resources which + would be unavailable to the test driver. + + The SKIP flag can also be used for temporarily "commenting out" + examples. + +\end{datadesc} + \begin{datadesc}{COMPARISON_FLAGS} A bitmask or'ing together all the comparison flags above. \end{datadesc} @@ -741,6 +758,7 @@ can be useful. were added; by default \code{<BLANKLINE>} in expected output matches an empty line in actual output; and doctest directives were added]{2.4} +\versionchanged[Constant \constant{SKIP} was added]{2.5} There's also a way to register new option flag names, although this isn't useful unless you intend to extend \refmodule{doctest} internals @@ -1040,7 +1058,11 @@ runner.run(suite) There are two main functions for creating \class{\refmodule{unittest}.TestSuite} instances from text files and modules with doctests: -\begin{funcdesc}{DocFileSuite}{*paths, **kw} +\begin{funcdesc}{DocFileSuite}{\optional{module_relative}\optional{, + package}\optional{, setUp}\optional{, + tearDown}\optional{, globs}\optional{, + optionflags}\optional{, parser}} + Convert doctest tests from one or more text files to a \class{\refmodule{unittest}.TestSuite}. @@ -1108,9 +1130,9 @@ instances from text files and modules with doctests: \versionadded{2.4} - Starting in Python 2.5, the global \code{__file__} was added to the + \versionchanged[The global \code{__file__} was added to the globals provided to doctests loaded from a text file using - \function{DocFileSuite()}. + \function{DocFileSuite()}]{2.5} \end{funcdesc} \begin{funcdesc}{DocTestSuite}{\optional{module}\optional{, diff --git a/Doc/lib/libexcs.tex b/Doc/lib/libexcs.tex index 85058a4..30fe831 100644 --- a/Doc/lib/libexcs.tex +++ b/Doc/lib/libexcs.tex @@ -80,7 +80,6 @@ text message explaining why the exception had been raised. If more data needs to be attached to the exception, attach it through arbitrary attributes on the instance. All arguments are also stored in \member{args} as a tuple, but it will eventually be deprecated and thus its use is discouraged. -\versionchanged[Changed to inherit from \exception{BaseException}]{2.5} \versionadded{2.5} \end{excdesc} @@ -88,6 +87,7 @@ eventually be deprecated and thus its use is discouraged. All built-in, non-system-exiting exceptions are derived from this class. All user-defined exceptions should also be derived from this class. +\versionchanged[Changed to inherit from \exception{BaseException}]{2.5} \end{excdesc} \begin{excdesc}{StandardError} @@ -394,11 +394,15 @@ Raised when an \keyword{assert} statement fails. \begin{excdesc}{WindowsError} Raised when a Windows-specific error occurs or when the error number does not correspond to an \cdata{errno} value. The - \member{errno} and \member{strerror} values are created from the + \member{winerror} and \member{strerror} values are created from the return values of the \cfunction{GetLastError()} and \cfunction{FormatMessage()} functions from the Windows Platform API. + The \member{errno} value maps the \member{winerror} value to + corresponding \code{errno.h} values. This is a subclass of \exception{OSError}. \versionadded{2.0} +\versionchanged[Previous versions put the \cfunction{GetLastError()} +codes into \member{errno}]{2.5} \end{excdesc} \begin{excdesc}{ZeroDivisionError} @@ -442,6 +446,11 @@ Base class for warnings about constructs that will change semantically in the future. \end{excdesc} +\begin{excdesc}{ImportWarning} +Base class for warnings about probable mistakes in module imports. +\versionadded{2.5} +\end{excdesc} + The class hierarchy for built-in exceptions is: \verbatiminput{../../Lib/test/exception_hierarchy.txt} diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index c0352d3..8904d5f 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -418,7 +418,7 @@ class C: that differentiate between binary and text files (else it is ignored). If the file cannot be opened, \exception{IOError} is raised. - + In addition to the standard \cfunction{fopen()} values \var{mode} may be \code{'U'} or \code{'rU'}. If Python is built with universal newline support (the default) the file is opened as a text file, but @@ -434,6 +434,9 @@ class C: have yet been seen), \code{'\e n'}, \code{'\e r'}, \code{'\e r\e n'}, or a tuple containing all the newline types seen. + Python enforces that the mode, after stripping \code{'U'}, begins with + \code{'r'}, \code{'w'} or \code{'a'}. + If \var{mode} is omitted, it defaults to \code{'r'}. When opening a binary file, you should append \code{'b'} to the \var{mode} value for improved portability. (It's useful even on systems which don't @@ -455,12 +458,10 @@ class C: after any I/O has been performed, and there's no reliable way to determine whether this is the case.} - The \function{file()} constructor is new in Python 2.2 and is an - alias for \function{open()}. Both spellings are equivalent. The - intent is for \function{open()} to continue to be preferred for use - as a factory function which returns a new \class{file} object. The - spelling, \class{file} is more suited to type testing (for example, - writing \samp{isinstance(f, file)}). + \versionadded{2.2} + + \versionchanged[Restriction on first letter of mode string + introduced]{2.5} \end{funcdesc} \begin{funcdesc}{filter}{function, list} @@ -708,7 +709,10 @@ class C: \end{funcdesc} \begin{funcdesc}{open}{filename\optional{, mode\optional{, bufsize}}} - An alias for the \function{file()} function above. + A wrapper for the \function{file()} function above. The intent is + for \function{open()} to be preferred for use as a factory function + returning a new \class{file} object. \class{file} is more suited to + type testing (for example, writing \samp{isinstance(f, file)}). \end{funcdesc} \begin{funcdesc}{ord}{c} diff --git a/Doc/lib/libgetpass.tex b/Doc/lib/libgetpass.tex index 1d177d3..a742439 100644 --- a/Doc/lib/libgetpass.tex +++ b/Doc/lib/libgetpass.tex @@ -19,7 +19,7 @@ The \module{getpass} module provides two functions: \code{sys.stdout} (this argument is ignored on Windows). Availability: Macintosh, \UNIX, Windows. - \versionadded[The \var{stream} parameter]{2.5} + \versionchanged[The \var{stream} parameter was added]{2.5} \end{funcdesc} diff --git a/Doc/lib/libhtmlparser.tex b/Doc/lib/libhtmlparser.tex index b85ba56..52f8409 100644 --- a/Doc/lib/libhtmlparser.tex +++ b/Doc/lib/libhtmlparser.tex @@ -132,7 +132,7 @@ implementation does nothing. \begin{methoddesc}{handle_decl}{decl} Method called when an SGML declaration is read by the parser. The \var{decl} parameter will be the entire contents of the declaration -inside the \code{<!}...\code{>} markup.It is intended to be overridden +inside the \code{<!}...\code{>} markup. It is intended to be overridden by a derived class; the base class implementation does nothing. \end{methoddesc} diff --git a/Doc/lib/liblocale.tex b/Doc/lib/liblocale.tex index e6ba2c1..688ccb0 100644 --- a/Doc/lib/liblocale.tex +++ b/Doc/lib/liblocale.tex @@ -61,7 +61,7 @@ locale.setlocale(locale.LC_ALL, '') Returns the database of the local conventions as a dictionary. This dictionary has the following strings as keys: - \begin{tableiii}{l|l|p{3in}}{constant}{Key}{Category}{Meaning} + \begin{tableiii}{l|l|p{3in}}{constant}{Category}{Key}{Meaning} \lineiii{LC_NUMERIC}{\code{'decimal_point'}} {Decimal point character.} \lineiii{}{\code{'grouping'}} @@ -76,8 +76,20 @@ locale.setlocale(locale.LC_ALL, '') {International currency symbol.} \lineiii{}{\code{'currency_symbol'}} {Local currency symbol.} + \lineiii{}{\code{'p_cs_precedes/n_cs_precedes'}} + {Whether the currency symbol precedes the value (for positive resp. + negative values).} + \lineiii{}{\code{'p_sep_by_space/n_sep_by_space'}} + {Whether the currency symbol is separated from the value + by a space (for positive resp. negative values).} \lineiii{}{\code{'mon_decimal_point'}} {Decimal point used for monetary values.} + \lineiii{}{\code{'frac_digits'}} + {Number of fractional digits used in local formatting + of monetary values.} + \lineiii{}{\code{'int_frac_digits'}} + {Number of fractional digits used in international + formatting of monetary values.} \lineiii{}{\code{'mon_thousands_sep'}} {Group separator used for monetary values.} \lineiii{}{\code{'mon_grouping'}} @@ -87,13 +99,12 @@ locale.setlocale(locale.LC_ALL, '') {Symbol used to annotate a positive monetary value.} \lineiii{}{\code{'negative_sign'}} {Symbol used to annotate a negative monetary value.} - \lineiii{}{\code{'frac_digits'}} - {Number of fractional digits used in local formatting - of monetary values.} - \lineiii{}{\code{'int_frac_digits'}} - {Number of fractional digits used in international - formatting of monetary values.} + \lineiii{}{\code{'p_sign_posn/n_sign_posn'}} + {The position of the sign (for positive resp. negative values), see below.} \end{tableiii} + + All numeric values can be set to \constant{CHAR_MAX} to indicate that + there is no value specified in this locale. The possible values for \code{'p_sign_posn'} and \code{'n_sign_posn'} are given below. @@ -104,7 +115,7 @@ locale.setlocale(locale.LC_ALL, '') \lineii{2}{The sign should follow the value and currency symbol.} \lineii{3}{The sign should immediately precede the value.} \lineii{4}{The sign should immediately follow the value.} - \lineii{\constant{LC_MAX}}{Nothing is specified in this locale.} + \lineii{\constant{CHAR_MAX}}{Nothing is specified in this locale.} \end{tableii} \end{funcdesc} @@ -206,12 +217,44 @@ for which symbolic constants are available in the locale module. strings. \end{funcdesc} -\begin{funcdesc}{format}{format, val\optional{, grouping}} +\begin{funcdesc}{format}{format, val\optional{, grouping\optional{, monetary}}} Formats a number \var{val} according to the current \constant{LC_NUMERIC} setting. The format follows the conventions of the \code{\%} operator. For floating point values, the decimal point is modified if appropriate. If \var{grouping} is true, also takes the grouping into account. + + If \var{monetary} is true, the conversion uses monetary thousands + separator and grouping strings. + + Please note that this function will only work for exactly one \%char + specifier. For whole format strings, use \function{format_string()}. + + \versionchanged[Added the \var{monetary} parameter]{2.5} +\end{funcdesc} + +\begin{funcdesc}{format_string}{format, val\optional{, grouping}} + Processes formatting specifiers as in \code{format \% val}, + but takes the current locale settings into account. + + \versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{currency}{val\optional{, symbol\optional{, grouping\optional{, international}}}} + Formats a number \var{val} according to the current \constant{LC_MONETARY} + settings. + + The returned string includes the currency symbol if \var{symbol} is true, + which is the default. + If \var{grouping} is true (which is not the default), grouping is done with + the value. + If \var{international} is true (which is not the default), the international + currency symbol is used. + + Note that this function will not work with the `C' locale, so you have to set + a locale via \function{setlocale()} first. + + \versionadded{2.5} \end{funcdesc} \begin{funcdesc}{str}{float} diff --git a/Doc/lib/libmailbox.tex b/Doc/lib/libmailbox.tex index dd18562..0a1f792 100644 --- a/Doc/lib/libmailbox.tex +++ b/Doc/lib/libmailbox.tex @@ -1,12 +1,1253 @@ \section{\module{mailbox} --- - Read various mailbox formats} + Manipulate mailboxes in various formats} -\declaremodule{standard}{mailbox} -\modulesynopsis{Read various mailbox formats.} +\declaremodule{}{mailbox} +\moduleauthor{Gregory K.~Johnson}{gkj@gregorykjohnson.com} +\sectionauthor{Gregory K.~Johnson}{gkj@gregorykjohnson.com} +\modulesynopsis{Manipulate mailboxes in various formats} -This module defines a number of classes that allow easy and uniform -access to mail messages in a (\UNIX) mailbox. +This module defines two classes, \class{Mailbox} and \class{Message}, for +accessing and manipulating on-disk mailboxes and the messages they contain. +\class{Mailbox} offers a dictionary-like mapping from keys to messages. +\class{Message} extends the \module{email.Message} module's \class{Message} +class with format-specific state and behavior. Supported mailbox formats are +Maildir, mbox, MH, Babyl, and MMDF. + +\begin{seealso} + \seemodule{email}{Represent and manipulate messages.} +\end{seealso} + +\subsection{\class{Mailbox} objects} +\label{mailbox-objects} + +\begin{classdesc*}{Mailbox} +A mailbox, which may be inspected and modified. +\end{classdesc*} + +The \class{Mailbox} interface is dictionary-like, with small keys +corresponding to messages. Keys are issued by the \class{Mailbox} instance +with which they will be used and are only meaningful to that \class{Mailbox} +instance. A key continues to identify a message even if the corresponding +message is modified, such as by replacing it with another message. Messages may +be added to a \class{Mailbox} instance using the set-like method +\method{add()} and removed using a \code{del} statement or the set-like methods +\method{remove()} and \method{discard()}. + +\class{Mailbox} interface semantics differ from dictionary semantics in some +noteworthy ways. Each time a message is requested, a new representation +(typically a \class{Message} instance) is generated, based upon the current +state of the mailbox. Similarly, when a message is added to a \class{Mailbox} +instance, the provided message representation's contents are copied. In neither +case is a reference to the message representation kept by the \class{Mailbox} +instance. + +The default \class{Mailbox} iterator iterates over message representations, not +keys as the default dictionary iterator does. Moreover, modification of a +mailbox during iteration is safe and well-defined. Messages added to the +mailbox after an iterator is created will not be seen by the iterator. Messages +removed from the mailbox before the iterator yields them will be silently +skipped, though using a key from an iterator may result in a +\exception{KeyError} exception if the corresponding message is subsequently +removed. + +\class{Mailbox} itself is intended to define an interface and to be inherited +from by format-specific subclasses but is not intended to be instantiated. +Instead, you should instantiate a subclass. + +\class{Mailbox} instances have the following methods: + +\begin{methoddesc}{add}{message} +Add \var{message} to the mailbox and return the key that has been assigned to +it. + +Parameter \var{message} may be a \class{Message} instance, an +\class{email.Message.Message} instance, a string, or a file-like object (which +should be open in text mode). If \var{message} is an instance of the +appropriate format-specific \class{Message} subclass (e.g., if it's an +\class{mboxMessage} instance and this is an \class{mbox} instance), its +format-specific information is used. Otherwise, reasonable defaults for +format-specific information are used. +\end{methoddesc} + +\begin{methoddesc}{remove}{key} +\methodline{__delitem__}{key} +\methodline{discard}{key} +Delete the message corresponding to \var{key} from the mailbox. + +If no such message exists, a \exception{KeyError} exception is raised if the +method was called as \method{remove()} or \method{__delitem__()} but no +exception is raised if the method was called as \method{discard()}. The +behavior of \method{discard()} may be preferred if the underlying mailbox +format supports concurrent modification by other processes. +\end{methoddesc} + +\begin{methoddesc}{__setitem__}{key, message} +Replace the message corresponding to \var{key} with \var{message}. Raise a +\exception{KeyError} exception if no message already corresponds to \var{key}. + +As with \method{add()}, parameter \var{message} may be a \class{Message} +instance, an \class{email.Message.Message} instance, a string, or a file-like +object (which should be open in text mode). If \var{message} is an instance of +the appropriate format-specific \class{Message} subclass (e.g., if it's an +\class{mboxMessage} instance and this is an \class{mbox} instance), its +format-specific information is used. Otherwise, the format-specific information +of the message that currently corresponds to \var{key} is left unchanged. +\end{methoddesc} + +\begin{methoddesc}{iterkeys}{} +\methodline{keys}{} +Return an iterator over all keys if called as \method{iterkeys()} or return a +list of keys if called as \method{keys()}. +\end{methoddesc} + +\begin{methoddesc}{itervalues}{} +\methodline{__iter__}{} +\methodline{values}{} +Return an iterator over representations of all messages if called as +\method{itervalues()} or \method{__iter__()} or return a list of such +representations if called as \method{values()}. The messages are represented as +instances of the appropriate format-specific \class{Message} subclass unless a +custom message factory was specified when the \class{Mailbox} instance was +initialized. \note{The behavior of \method{__iter__()} is unlike that of +dictionaries, which iterate over keys.} +\end{methoddesc} + +\begin{methoddesc}{iteritems}{} +\methodline{items}{} +Return an iterator over (\var{key}, \var{message}) pairs, where \var{key} is a +key and \var{message} is a message representation, if called as +\method{iteritems()} or return a list of such pairs if called as +\method{items()}. The messages are represented as instances of the appropriate +format-specific \class{Message} subclass unless a custom message factory was +specified when the \class{Mailbox} instance was initialized. +\end{methoddesc} + +\begin{methoddesc}{get}{key\optional{, default=None}} +\methodline{__getitem__}{key} +Return a representation of the message corresponding to \var{key}. If no such +message exists, \var{default} is returned if the method was called as +\method{get()} and a \exception{KeyError} exception is raised if the method was +called as \method{__getitem__()}. The message is represented as an instance of +the appropriate format-specific \class{Message} subclass unless a custom +message factory was specified when the \class{Mailbox} instance was +initialized. +\end{methoddesc} + +\begin{methoddesc}{get_message}{key} +Return a representation of the message corresponding to \var{key} as an +instance of the appropriate format-specific \class{Message} subclass, or raise +a \exception{KeyError} exception if no such message exists. +\end{methoddesc} + +\begin{methoddesc}{get_string}{key} +Return a string representation of the message corresponding to \var{key}, or +raise a \exception{KeyError} exception if no such message exists. +\end{methoddesc} + +\begin{methoddesc}{get_file}{key} +Return a file-like representation of the message corresponding to \var{key}, +or raise a \exception{KeyError} exception if no such message exists. The +file-like object behaves as if open in binary mode. This file should be closed +once it is no longer needed. + +\note{Unlike other representations of messages, file-like representations are +not necessarily independent of the \class{Mailbox} instance that created them +or of the underlying mailbox. More specific documentation is provided by each +subclass.} +\end{methoddesc} + +\begin{methoddesc}{has_key}{key} +\methodline{__contains__}{key} +Return \code{True} if \var{key} corresponds to a message, \code{False} +otherwise. +\end{methoddesc} + +\begin{methoddesc}{__len__}{} +Return a count of messages in the mailbox. +\end{methoddesc} + +\begin{methoddesc}{clear}{} +Delete all messages from the mailbox. +\end{methoddesc} + +\begin{methoddesc}{pop}{key\optional{, default}} +Return a representation of the message corresponding to \var{key} and delete +the message. If no such message exists, return \var{default} if it was supplied +or else raise a \exception{KeyError} exception. The message is represented as +an instance of the appropriate format-specific \class{Message} subclass unless +a custom message factory was specified when the \class{Mailbox} instance was +initialized. +\end{methoddesc} + +\begin{methoddesc}{popitem}{} +Return an arbitrary (\var{key}, \var{message}) pair, where \var{key} is a key +and \var{message} is a message representation, and delete the corresponding +message. If the mailbox is empty, raise a \exception{KeyError} exception. The +message is represented as an instance of the appropriate format-specific +\class{Message} subclass unless a custom message factory was specified when the +\class{Mailbox} instance was initialized. +\end{methoddesc} + +\begin{methoddesc}{update}{arg} +Parameter \var{arg} should be a \var{key}-to-\var{message} mapping or an +iterable of (\var{key}, \var{message}) pairs. Updates the mailbox so that, for +each given \var{key} and \var{message}, the message corresponding to \var{key} +is set to \var{message} as if by using \method{__setitem__()}. As with +\method{__setitem__()}, each \var{key} must already correspond to a message in +the mailbox or else a \exception{KeyError} exception will be raised, so in +general it is incorrect for \var{arg} to be a \class{Mailbox} instance. +\note{Unlike with dictionaries, keyword arguments are not supported.} +\end{methoddesc} + +\begin{methoddesc}{flush}{} +Write any pending changes to the filesystem. For some \class{Mailbox} +subclasses, changes are always written immediately and this method does +nothing. +\end{methoddesc} + +\begin{methoddesc}{lock}{} +Acquire an exclusive advisory lock on the mailbox so that other processes know +not to modify it. An \exception{ExternalClashError} is raised if the lock is +not available. The particular locking mechanisms used depend upon the mailbox +format. +\end{methoddesc} + +\begin{methoddesc}{unlock}{} +Release the lock on the mailbox, if any. +\end{methoddesc} + +\begin{methoddesc}{close}{} +Flush the mailbox, unlock it if necessary, and close any open files. For some +\class{Mailbox} subclasses, this method does nothing. +\end{methoddesc} + + +\subsubsection{\class{Maildir}} +\label{mailbox-maildir} + +\begin{classdesc}{Maildir}{dirname\optional{, factory=rfc822.Message\optional{, +create=True}}} +A subclass of \class{Mailbox} for mailboxes in Maildir format. Parameter +\var{factory} is a callable object that accepts a file-like message +representation (which behaves as if opened in binary mode) and returns a custom +representation. If \var{factory} is \code{None}, \class{MaildirMessage} is used +as the default message representation. If \var{create} is \code{True}, the +mailbox is created if it does not exist. + +It is for historical reasons that \var{factory} defaults to +\class{rfc822.Message} and that \var{dirname} is named as such rather than +\var{path}. For a \class{Maildir} instance that behaves like instances of other +\class{Mailbox} subclasses, set \var{factory} to \code{None}. +\end{classdesc} + +Maildir is a directory-based mailbox format invented for the qmail mail +transfer agent and now widely supported by other programs. Messages in a +Maildir mailbox are stored in separate files within a common directory +structure. This design allows Maildir mailboxes to be accessed and modified by +multiple unrelated programs without data corruption, so file locking is +unnecessary. + +Maildir mailboxes contain three subdirectories, namely: \file{tmp}, \file{new}, +and \file{cur}. Messages are created momentarily in the \file{tmp} subdirectory +and then moved to the \file{new} subdirectory to finalize delivery. A mail user +agent may subsequently move the message to the \file{cur} subdirectory and +store information about the state of the message in a special "info" section +appended to its file name. + +Folders of the style introduced by the Courier mail transfer agent are also +supported. Any subdirectory of the main mailbox is considered a folder if +\character{.} is the first character in its name. Folder names are represented +by \class{Maildir} without the leading \character{.}. Each folder is itself a +Maildir mailbox but should not contain other folders. Instead, a logical +nesting is indicated using \character{.} to delimit levels, e.g., +"Archived.2005.07". + +\begin{notice} +The Maildir specification requires the use of a colon (\character{:}) in +certain message file names. However, some operating systems do not permit this +character in file names, If you wish to use a Maildir-like format on such an +operating system, you should specify another character to use instead. The +exclamation point (\character{!}) is a popular choice. For example: +\begin{verbatim} +import mailbox +mailbox.Maildir.colon = '!' +\end{verbatim} +The \member{colon} attribute may also be set on a per-instance basis. +\end{notice} + +\class{Maildir} instances have all of the methods of \class{Mailbox} in +addition to the following: + +\begin{methoddesc}{list_folders}{} +Return a list of the names of all folders. +\end{methoddesc} + +\begin{methoddesc}{get_folder}{folder} +Return a \class{Maildir} instance representing the folder whose name is +\var{folder}. A \exception{NoSuchMailboxError} exception is raised if the +folder does not exist. +\end{methoddesc} + +\begin{methoddesc}{add_folder}{folder} +Create a folder whose name is \var{folder} and return a \class{Maildir} +instance representing it. +\end{methoddesc} + +\begin{methoddesc}{remove_folder}{folder} +Delete the folder whose name is \var{folder}. If the folder contains any +messages, a \exception{NotEmptyError} exception will be raised and the folder +will not be deleted. +\end{methoddesc} + +\begin{methoddesc}{clean}{} +Delete temporary files from the mailbox that have not been accessed in the +last 36 hours. The Maildir specification says that mail-reading programs +should do this occasionally. +\end{methoddesc} + +Some \class{Mailbox} methods implemented by \class{Maildir} deserve special +remarks: + +\begin{methoddesc}{add}{message} +\methodline[Maildir]{__setitem__}{key, message} +\methodline[Maildir]{update}{arg} +\warning{These methods generate unique file names based upon the current +process ID. When using multiple threads, undetected name clashes may occur and +cause corruption of the mailbox unless threads are coordinated to avoid using +these methods to manipulate the same mailbox simultaneously.} +\end{methoddesc} + +\begin{methoddesc}{flush}{} +All changes to Maildir mailboxes are immediately applied, so this method does +nothing. +\end{methoddesc} + +\begin{methoddesc}{lock}{} +\methodline{unlock}{} +Maildir mailboxes do not support (or require) locking, so these methods do +nothing. +\end{methoddesc} + +\begin{methoddesc}{close}{} +\class{Maildir} instances do not keep any open files and the underlying +mailboxes do not support locking, so this method does nothing. +\end{methoddesc} + +\begin{methoddesc}{get_file}{key} +Depending upon the host platform, it may not be possible to modify or remove +the underlying message while the returned file remains open. +\end{methoddesc} + +\begin{seealso} + \seelink{http://www.qmail.org/man/man5/maildir.html}{maildir man page from + qmail}{The original specification of the format.} + \seelink{http://cr.yp.to/proto/maildir.html}{Using maildir format}{Notes + on Maildir by its inventor. Includes an updated name-creation scheme and + details on "info" semantics.} + \seelink{http://www.courier-mta.org/?maildir.html}{maildir man page from + Courier}{Another specification of the format. Describes a common extension + for supporting folders.} +\end{seealso} + +\subsubsection{\class{mbox}} +\label{mailbox-mbox} + +\begin{classdesc}{mbox}{path\optional{, factory=None\optional{, create=True}}} +A subclass of \class{Mailbox} for mailboxes in mbox format. Parameter +\var{factory} is a callable object that accepts a file-like message +representation (which behaves as if opened in binary mode) and returns a custom +representation. If \var{factory} is \code{None}, \class{mboxMessage} is used as +the default message representation. If \var{create} is \code{True}, the mailbox +is created if it does not exist. +\end{classdesc} + +The mbox format is the classic format for storing mail on \UNIX{} systems. All +messages in an mbox mailbox are stored in a single file with the beginning of +each message indicated by a line whose first five characters are "From~". + +Several variations of the mbox format exist to address perceived shortcomings +in the original. In the interest of compatibility, \class{mbox} implements the +original format, which is sometimes referred to as \dfn{mboxo}. This means that +the \mailheader{Content-Length} header, if present, is ignored and that any +occurrences of "From~" at the beginning of a line in a message body are +transformed to ">From~" when storing the message, although occurences of +">From~" are not transformed to "From~" when reading the message. + +Some \class{Mailbox} methods implemented by \class{mbox} deserve special +remarks: + +\begin{methoddesc}{get_file}{key} +Using the file after calling \method{flush()} or \method{close()} on the +\class{mbox} instance may yield unpredictable results or raise an exception. +\end{methoddesc} + +\begin{methoddesc}{lock}{} +\methodline{unlock}{} +Three locking mechanisms are used---dot locking and, if available, the +\cfunction{flock()} and \cfunction{lockf()} system calls. +\end{methoddesc} + +\begin{seealso} + \seelink{http://www.qmail.org/man/man5/mbox.html}{mbox man page from + qmail}{A specification of the format and its variations.} + \seelink{http://www.tin.org/bin/man.cgi?section=5\&topic=mbox}{mbox man + page from tin}{Another specification of the format, with details on + locking.} + \seelink{http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html} + {Configuring Netscape Mail on \UNIX{}: Why The Content-Length Format is + Bad}{An argument for using the original mbox format rather than a + variation.} + \seelink{http://homepages.tesco.net./\tilde{}J.deBoynePollard/FGA/mail-mbox-formats.html} + {"mbox" is a family of several mutually incompatible mailbox formats}{A + history of mbox variations.} +\end{seealso} + +\subsubsection{\class{MH}} +\label{mailbox-mh} + +\begin{classdesc}{MH}{path\optional{, factory=None\optional{, create=True}}} +A subclass of \class{Mailbox} for mailboxes in MH format. Parameter +\var{factory} is a callable object that accepts a file-like message +representation (which behaves as if opened in binary mode) and returns a custom +representation. If \var{factory} is \code{None}, \class{MHMessage} is used as +the default message representation. If \var{create} is \code{True}, the mailbox +is created if it does not exist. +\end{classdesc} + +MH is a directory-based mailbox format invented for the MH Message Handling +System, a mail user agent. Each message in an MH mailbox resides in its own +file. An MH mailbox may contain other MH mailboxes (called \dfn{folders}) in +addition to messages. Folders may be nested indefinitely. MH mailboxes also +support \dfn{sequences}, which are named lists used to logically group messages +without moving them to sub-folders. Sequences are defined in a file called +\file{.mh_sequences} in each folder. + +The \class{MH} class manipulates MH mailboxes, but it does not attempt to +emulate all of \program{mh}'s behaviors. In particular, it does not modify and +is not affected by the \file{context} or \file{.mh_profile} files that are used +by \program{mh} to store its state and configuration. + +\class{MH} instances have all of the methods of \class{Mailbox} in addition to +the following: + +\begin{methoddesc}{list_folders}{} +Return a list of the names of all folders. +\end{methoddesc} + +\begin{methoddesc}{get_folder}{folder} +Return an \class{MH} instance representing the folder whose name is +\var{folder}. A \exception{NoSuchMailboxError} exception is raised if the +folder does not exist. +\end{methoddesc} + +\begin{methoddesc}{add_folder}{folder} +Create a folder whose name is \var{folder} and return an \class{MH} instance +representing it. +\end{methoddesc} + +\begin{methoddesc}{remove_folder}{folder} +Delete the folder whose name is \var{folder}. If the folder contains any +messages, a \exception{NotEmptyError} exception will be raised and the folder +will not be deleted. +\end{methoddesc} + +\begin{methoddesc}{get_sequences}{} +Return a dictionary of sequence names mapped to key lists. If there are no +sequences, the empty dictionary is returned. +\end{methoddesc} + +\begin{methoddesc}{set_sequences}{sequences} +Re-define the sequences that exist in the mailbox based upon \var{sequences}, a +dictionary of names mapped to key lists, like returned by +\method{get_sequences()}. +\end{methoddesc} + +\begin{methoddesc}{pack}{} +Rename messages in the mailbox as necessary to eliminate gaps in numbering. +Entries in the sequences list are updated correspondingly. \note{Already-issued +keys are invalidated by this operation and should not be subsequently used.} +\end{methoddesc} + +Some \class{Mailbox} methods implemented by \class{MH} deserve special remarks: + +\begin{methoddesc}{remove}{key} +\methodline{__delitem__}{key} +\methodline{discard}{key} +These methods immediately delete the message. The MH convention of marking a +message for deletion by prepending a comma to its name is not used. +\end{methoddesc} + +\begin{methoddesc}{lock}{} +\methodline{unlock}{} +Three locking mechanisms are used---dot locking and, if available, the +\cfunction{flock()} and \cfunction{lockf()} system calls. For MH mailboxes, +locking the mailbox means locking the \file{.mh_sequences} file and, only for +the duration of any operations that affect them, locking individual message +files. +\end{methoddesc} + +\begin{methoddesc}{get_file}{key} +Depending upon the host platform, it may not be possible to remove the +underlying message while the returned file remains open. +\end{methoddesc} + +\begin{methoddesc}{flush}{} +All changes to MH mailboxes are immediately applied, so this method does +nothing. +\end{methoddesc} + +\begin{methoddesc}{close}{} +\class{MH} instances do not keep any open files, so this method is equivelant +to \method{unlock()}. +\end{methoddesc} + +\begin{seealso} +\seelink{http://www.nongnu.org/nmh/}{nmh - Message Handling System}{Home page +of \program{nmh}, an updated version of the original \program{mh}.} +\seelink{http://www.ics.uci.edu/\tilde{}mh/book/}{MH \& nmh: Email for Users \& +Programmers}{A GPL-licensed book on \program{mh} and \program{nmh}, with some +information on the mailbox format.} +\end{seealso} + +\subsubsection{\class{Babyl}} +\label{mailbox-babyl} + +\begin{classdesc}{Babyl}{path\optional{, factory=None\optional{, create=True}}} +A subclass of \class{Mailbox} for mailboxes in Babyl format. Parameter +\var{factory} is a callable object that accepts a file-like message +representation (which behaves as if opened in binary mode) and returns a custom +representation. If \var{factory} is \code{None}, \class{BabylMessage} is used +as the default message representation. If \var{create} is \code{True}, the +mailbox is created if it does not exist. +\end{classdesc} + +Babyl is a single-file mailbox format used by the Rmail mail user agent +included with Emacs. The beginning of a message is indicated by a line +containing the two characters Control-Underscore +(\character{\textbackslash037}) and Control-L (\character{\textbackslash014}). +The end of a message is indicated by the start of the next message or, in the +case of the last message, a line containing a Control-Underscore +(\character{\textbackslash037}) character. + +Messages in a Babyl mailbox have two sets of headers, original headers and +so-called visible headers. Visible headers are typically a subset of the +original headers that have been reformatted or abridged to be more attractive. +Each message in a Babyl mailbox also has an accompanying list of \dfn{labels}, +or short strings that record extra information about the message, and a list of +all user-defined labels found in the mailbox is kept in the Babyl options +section. + +\class{Babyl} instances have all of the methods of \class{Mailbox} in addition +to the following: + +\begin{methoddesc}{get_labels}{} +Return a list of the names of all user-defined labels used in the mailbox. +\note{The actual messages are inspected to determine which labels exist in the +mailbox rather than consulting the list of labels in the Babyl options section, +but the Babyl section is updated whenever the mailbox is modified.} +\end{methoddesc} + +Some \class{Mailbox} methods implemented by \class{Babyl} deserve special +remarks: + +\begin{methoddesc}{get_file}{key} +In Babyl mailboxes, the headers of a message are not stored contiguously with +the body of the message. To generate a file-like representation, the headers +and body are copied together into a \class{StringIO} instance (from the +\module{StringIO} module), which has an API identical to that of a file. As a +result, the file-like object is truly independent of the underlying mailbox but +does not save memory compared to a string representation. +\end{methoddesc} + +\begin{methoddesc}{lock}{} +\methodline{unlock}{} +Three locking mechanisms are used---dot locking and, if available, the +\cfunction{flock()} and \cfunction{lockf()} system calls. +\end{methoddesc} + +\begin{seealso} +\seelink{http://quimby.gnus.org/notes/BABYL}{Format of Version 5 Babyl Files}{A +specification of the Babyl format.} +\seelink{http://www.gnu.org/software/emacs/manual/html_node/Rmail.html}{Reading +Mail with Rmail}{The Rmail manual, with some information on Babyl semantics.} +\end{seealso} + +\subsubsection{\class{MMDF}} +\label{mailbox-mmdf} + +\begin{classdesc}{MMDF}{path\optional{, factory=None\optional{, create=True}}} +A subclass of \class{Mailbox} for mailboxes in MMDF format. Parameter +\var{factory} is a callable object that accepts a file-like message +representation (which behaves as if opened in binary mode) and returns a custom +representation. If \var{factory} is \code{None}, \class{MMDFMessage} is used as +the default message representation. If \var{create} is \code{True}, the mailbox +is created if it does not exist. +\end{classdesc} + +MMDF is a single-file mailbox format invented for the Multichannel Memorandum +Distribution Facility, a mail transfer agent. Each message is in the same form +as an mbox message but is bracketed before and after by lines containing four +Control-A (\character{\textbackslash001}) characters. As with the mbox format, +the beginning of each message is indicated by a line whose first five +characters are "From~", but additional occurrences of "From~" are not +transformed to ">From~" when storing messages because the extra message +separator lines prevent mistaking such occurrences for the starts of subsequent +messages. + +Some \class{Mailbox} methods implemented by \class{MMDF} deserve special +remarks: + +\begin{methoddesc}{get_file}{key} +Using the file after calling \method{flush()} or \method{close()} on the +\class{MMDF} instance may yield unpredictable results or raise an exception. +\end{methoddesc} + +\begin{methoddesc}{lock}{} +\methodline{unlock}{} +Three locking mechanisms are used---dot locking and, if available, the +\cfunction{flock()} and \cfunction{lockf()} system calls. +\end{methoddesc} + +\begin{seealso} +\seelink{http://www.tin.org/bin/man.cgi?section=5\&topic=mmdf}{mmdf man page +from tin}{A specification of MMDF format from the documentation of tin, a +newsreader.} +\seelink{http://en.wikipedia.org/wiki/MMDF}{MMDF}{A Wikipedia article +describing the Multichannel Memorandum Distribution Facility.} +\end{seealso} + +\subsection{\class{Message} objects} +\label{mailbox-message-objects} + +\begin{classdesc}{Message}{\optional{message}} +A subclass of the \module{email.Message} module's \class{Message}. Subclasses +of \class{mailbox.Message} add mailbox-format-specific state and behavior. + +If \var{message} is omitted, the new instance is created in a default, empty +state. If \var{message} is an \class{email.Message.Message} instance, its +contents are copied; furthermore, any format-specific information is converted +insofar as possible if \var{message} is a \class{Message} instance. If +\var{message} is a string or a file, it should contain an \rfc{2822}-compliant +message, which is read and parsed. +\end{classdesc} + +The format-specific state and behaviors offered by subclasses vary, but in +general it is only the properties that are not specific to a particular mailbox +that are supported (although presumably the properties are specific to a +particular mailbox format). For example, file offsets for single-file mailbox +formats and file names for directory-based mailbox formats are not retained, +because they are only applicable to the original mailbox. But state such as +whether a message has been read by the user or marked as important is retained, +because it applies to the message itself. + +There is no requirement that \class{Message} instances be used to represent +messages retrieved using \class{Mailbox} instances. In some situations, the +time and memory required to generate \class{Message} representations might not +not acceptable. For such situations, \class{Mailbox} instances also offer +string and file-like representations, and a custom message factory may be +specified when a \class{Mailbox} instance is initialized. + +\subsubsection{\class{MaildirMessage}} +\label{mailbox-maildirmessage} + +\begin{classdesc}{MaildirMessage}{\optional{message}} +A message with Maildir-specific behaviors. Parameter \var{message} +has the same meaning as with the \class{Message} constructor. +\end{classdesc} + +Typically, a mail user agent application moves all of the messages in the +\file{new} subdirectory to the \file{cur} subdirectory after the first time the +user opens and closes the mailbox, recording that the messages are old whether +or not they've actually been read. Each message in \file{cur} has an "info" +section added to its file name to store information about its state. (Some mail +readers may also add an "info" section to messages in \file{new}.) The "info" +section may take one of two forms: it may contain "2," followed by a list of +standardized flags (e.g., "2,FR") or it may contain "1," followed by so-called +experimental information. Standard flags for Maildir messages are as follows: + +\begin{tableiii}{l|l|l}{textrm}{Flag}{Meaning}{Explanation} +\lineiii{D}{Draft}{Under composition} +\lineiii{F}{Flagged}{Marked as important} +\lineiii{P}{Passed}{Forwarded, resent, or bounced} +\lineiii{R}{Replied}{Replied to} +\lineiii{S}{Seen}{Read} +\lineiii{T}{Trashed}{Marked for subsequent deletion} +\end{tableiii} + +\class{MaildirMessage} instances offer the following methods: + +\begin{methoddesc}{get_subdir}{} +Return either "new" (if the message should be stored in the \file{new} +subdirectory) or "cur" (if the message should be stored in the \file{cur} +subdirectory). \note{A message is typically moved from \file{new} to \file{cur} +after its mailbox has been accessed, whether or not the message is has been +read. A message \code{msg} has been read if \code{"S" not in msg.get_flags()} +is \code{True}.} +\end{methoddesc} + +\begin{methoddesc}{set_subdir}{subdir} +Set the subdirectory the message should be stored in. Parameter \var{subdir} +must be either "new" or "cur". +\end{methoddesc} + +\begin{methoddesc}{get_flags}{} +Return a string specifying the flags that are currently set. If the message +complies with the standard Maildir format, the result is the concatenation in +alphabetical order of zero or one occurrence of each of \character{D}, +\character{F}, \character{P}, \character{R}, \character{S}, and \character{T}. +The empty string is returned if no flags are set or if "info" contains +experimental semantics. +\end{methoddesc} + +\begin{methoddesc}{set_flags}{flags} +Set the flags specified by \var{flags} and unset all others. +\end{methoddesc} + +\begin{methoddesc}{add_flag}{flag} +Set the flag(s) specified by \var{flag} without changing other flags. To add +more than one flag at a time, \var{flag} may be a string of more than one +character. The current "info" is overwritten whether or not it contains +experimental information rather than +flags. +\end{methoddesc} + +\begin{methoddesc}{remove_flag}{flag} +Unset the flag(s) specified by \var{flag} without changing other flags. To +remove more than one flag at a time, \var{flag} maybe a string of more than one +character. If "info" contains experimental information rather than flags, the +current "info" is not modified. +\end{methoddesc} + +\begin{methoddesc}{get_date}{} +Return the delivery date of the message as a floating-point number representing +seconds since the epoch. +\end{methoddesc} + +\begin{methoddesc}{set_date}{date} +Set the delivery date of the message to \var{date}, a floating-point number +representing seconds since the epoch. +\end{methoddesc} + +\begin{methoddesc}{get_info}{} +Return a string containing the "info" for a message. This is useful for +accessing and modifying "info" that is experimental (i.e., not a list of +flags). +\end{methoddesc} + +\begin{methoddesc}{set_info}{info} +Set "info" to \var{info}, which should be a string. +\end{methoddesc} + +When a \class{MaildirMessage} instance is created based upon an +\class{mboxMessage} or \class{MMDFMessage} instance, the \mailheader{Status} +and \mailheader{X-Status} headers are omitted and the following conversions +take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{mboxMessage} or \class{MMDFMessage} state} +\lineii{"cur" subdirectory}{O flag} +\lineii{F flag}{F flag} +\lineii{R flag}{A flag} +\lineii{S flag}{R flag} +\lineii{T flag}{D flag} +\end{tableii} + +When a \class{MaildirMessage} instance is created based upon an +\class{MHMessage} instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MHMessage} state} +\lineii{"cur" subdirectory}{"unseen" sequence} +\lineii{"cur" subdirectory and S flag}{no "unseen" sequence} +\lineii{F flag}{"flagged" sequence} +\lineii{R flag}{"replied" sequence} +\end{tableii} + +When a \class{MaildirMessage} instance is created based upon a +\class{BabylMessage} instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{BabylMessage} state} +\lineii{"cur" subdirectory}{"unseen" label} +\lineii{"cur" subdirectory and S flag}{no "unseen" label} +\lineii{P flag}{"forwarded" or "resent" label} +\lineii{R flag}{"answered" label} +\lineii{T flag}{"deleted" label} +\end{tableii} + +\subsubsection{\class{mboxMessage}} +\label{mailbox-mboxmessage} + +\begin{classdesc}{mboxMessage}{\optional{message}} +A message with mbox-specific behaviors. Parameter \var{message} has the same +meaning as with the \class{Message} constructor. +\end{classdesc} + +Messages in an mbox mailbox are stored together in a single file. The sender's +envelope address and the time of delivery are typically stored in a line +beginning with "From~" that is used to indicate the start of a message, though +there is considerable variation in the exact format of this data among mbox +implementations. Flags that indicate the state of the message, such as whether +it has been read or marked as important, are typically stored in +\mailheader{Status} and \mailheader{X-Status} headers. + +Conventional flags for mbox messages are as follows: + +\begin{tableiii}{l|l|l}{textrm}{Flag}{Meaning}{Explanation} +\lineiii{R}{Read}{Read} +\lineiii{O}{Old}{Previously detected by MUA} +\lineiii{D}{Deleted}{Marked for subsequent deletion} +\lineiii{F}{Flagged}{Marked as important} +\lineiii{A}{Answered}{Replied to} +\end{tableiii} + +The "R" and "O" flags are stored in the \mailheader{Status} header, and the +"D", "F", and "A" flags are stored in the \mailheader{X-Status} header. The +flags and headers typically appear in the order mentioned. + +\class{mboxMessage} instances offer the following methods: + +\begin{methoddesc}{get_from}{} +Return a string representing the "From~" line that marks the start of the +message in an mbox mailbox. The leading "From~" and the trailing newline are +excluded. +\end{methoddesc} + +\begin{methoddesc}{set_from}{from_\optional{, time_=None}} +Set the "From~" line to \var{from_}, which should be specified without a +leading "From~" or trailing newline. For convenience, \var{time_} may be +specified and will be formatted appropriately and appended to \var{from_}. If +\var{time_} is specified, it should be a \class{struct_time} instance, a tuple +suitable for passing to \method{time.strftime()}, or \code{True} (to use +\method{time.gmtime()}). +\end{methoddesc} + +\begin{methoddesc}{get_flags}{} +Return a string specifying the flags that are currently set. If the message +complies with the conventional format, the result is the concatenation in the +following order of zero or one occurrence of each of \character{R}, +\character{O}, \character{D}, \character{F}, and \character{A}. +\end{methoddesc} + +\begin{methoddesc}{set_flags}{flags} +Set the flags specified by \var{flags} and unset all others. Parameter +\var{flags} should be the concatenation in any order of zero or more +occurrences of each of \character{R}, \character{O}, \character{D}, +\character{F}, and \character{A}. +\end{methoddesc} + +\begin{methoddesc}{add_flag}{flag} +Set the flag(s) specified by \var{flag} without changing other flags. To add +more than one flag at a time, \var{flag} may be a string of more than one +character. +\end{methoddesc} + +\begin{methoddesc}{remove_flag}{flag} +Unset the flag(s) specified by \var{flag} without changing other flags. To +remove more than one flag at a time, \var{flag} maybe a string of more than one +character. +\end{methoddesc} + +When an \class{mboxMessage} instance is created based upon a +\class{MaildirMessage} instance, a "From~" line is generated based upon the +\class{MaildirMessage} instance's delivery date, and the following conversions +take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MaildirMessage} state} +\lineii{R flag}{S flag} +\lineii{O flag}{"cur" subdirectory} +\lineii{D flag}{T flag} +\lineii{F flag}{F flag} +\lineii{A flag}{R flag} +\end{tableii} + +When an \class{mboxMessage} instance is created based upon an \class{MHMessage} +instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MHMessage} state} +\lineii{R flag and O flag}{no "unseen" sequence} +\lineii{O flag}{"unseen" sequence} +\lineii{F flag}{"flagged" sequence} +\lineii{A flag}{"replied" sequence} +\end{tableii} + +When an \class{mboxMessage} instance is created based upon a +\class{BabylMessage} instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{BabylMessage} state} +\lineii{R flag and O flag}{no "unseen" label} +\lineii{O flag}{"unseen" label} +\lineii{D flag}{"deleted" label} +\lineii{A flag}{"answered" label} +\end{tableii} + +When a \class{Message} instance is created based upon an \class{MMDFMessage} +instance, the "From~" line is copied and all flags directly correspond: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MMDFMessage} state} +\lineii{R flag}{R flag} +\lineii{O flag}{O flag} +\lineii{D flag}{D flag} +\lineii{F flag}{F flag} +\lineii{A flag}{A flag} +\end{tableii} + +\subsubsection{\class{MHMessage}} +\label{mailbox-mhmessage} + +\begin{classdesc}{MHMessage}{\optional{message}} +A message with MH-specific behaviors. Parameter \var{message} has the same +meaning as with the \class{Message} constructor. +\end{classdesc} + +MH messages do not support marks or flags in the traditional sense, but they do +support sequences, which are logical groupings of arbitrary messages. Some mail +reading programs (although not the standard \program{mh} and \program{nmh}) use +sequences in much the same way flags are used with other formats, as follows: + +\begin{tableii}{l|l}{textrm}{Sequence}{Explanation} +\lineii{unseen}{Not read, but previously detected by MUA} +\lineii{replied}{Replied to} +\lineii{flagged}{Marked as important} +\end{tableii} + +\class{MHMessage} instances offer the following methods: + +\begin{methoddesc}{get_sequences}{} +Return a list of the names of sequences that include this message. +\end{methoddesc} + +\begin{methoddesc}{set_sequences}{sequences} +Set the list of sequences that include this message. +\end{methoddesc} + +\begin{methoddesc}{add_sequence}{sequence} +Add \var{sequence} to the list of sequences that include this message. +\end{methoddesc} + +\begin{methoddesc}{remove_sequence}{sequence} +Remove \var{sequence} from the list of sequences that include this message. +\end{methoddesc} + +When an \class{MHMessage} instance is created based upon a +\class{MaildirMessage} instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MaildirMessage} state} +\lineii{"unseen" sequence}{no S flag} +\lineii{"replied" sequence}{R flag} +\lineii{"flagged" sequence}{F flag} +\end{tableii} + +When an \class{MHMessage} instance is created based upon an \class{mboxMessage} +or \class{MMDFMessage} instance, the \mailheader{Status} and +\mailheader{X-Status} headers are omitted and the following conversions take +place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{mboxMessage} or \class{MMDFMessage} state} +\lineii{"unseen" sequence}{no R flag} +\lineii{"replied" sequence}{A flag} +\lineii{"flagged" sequence}{F flag} +\end{tableii} + +When an \class{MHMessage} instance is created based upon a \class{BabylMessage} +instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{BabylMessage} state} +\lineii{"unseen" sequence}{"unseen" label} +\lineii{"replied" sequence}{"answered" label} +\end{tableii} + +\subsubsection{\class{BabylMessage}} +\label{mailbox-babylmessage} + +\begin{classdesc}{BabylMessage}{\optional{message}} +A message with Babyl-specific behaviors. Parameter \var{message} has the same +meaning as with the \class{Message} constructor. +\end{classdesc} + +Certain message labels, called \dfn{attributes}, are defined by convention to +have special meanings. The attributes are as follows: + +\begin{tableii}{l|l}{textrm}{Label}{Explanation} +\lineii{unseen}{Not read, but previously detected by MUA} +\lineii{deleted}{Marked for subsequent deletion} +\lineii{filed}{Copied to another file or mailbox} +\lineii{answered}{Replied to} +\lineii{forwarded}{Forwarded} +\lineii{edited}{Modified by the user} +\lineii{resent}{Resent} +\end{tableii} + +By default, Rmail displays only +visible headers. The \class{BabylMessage} class, though, uses the original +headers because they are more complete. Visible headers may be accessed +explicitly if desired. + +\class{BabylMessage} instances offer the following methods: + +\begin{methoddesc}{get_labels}{} +Return a list of labels on the message. +\end{methoddesc} + +\begin{methoddesc}{set_labels}{labels} +Set the list of labels on the message to \var{labels}. +\end{methoddesc} + +\begin{methoddesc}{add_label}{label} +Add \var{label} to the list of labels on the message. +\end{methoddesc} + +\begin{methoddesc}{remove_label}{label} +Remove \var{label} from the list of labels on the message. +\end{methoddesc} + +\begin{methoddesc}{get_visible}{} +Return an \class{Message} instance whose headers are the message's visible +headers and whose body is empty. +\end{methoddesc} + +\begin{methoddesc}{set_visible}{visible} +Set the message's visible headers to be the same as the headers in +\var{message}. Parameter \var{visible} should be a \class{Message} instance, an +\class{email.Message.Message} instance, a string, or a file-like object (which +should be open in text mode). +\end{methoddesc} + +\begin{methoddesc}{update_visible}{} +When a \class{BabylMessage} instance's original headers are modified, the +visible headers are not automatically modified to correspond. This method +updates the visible headers as follows: each visible header with a +corresponding original header is set to the value of the original header, each +visible header without a corresponding original header is removed, and any of +\mailheader{Date}, \mailheader{From}, \mailheader{Reply-To}, \mailheader{To}, +\mailheader{CC}, and \mailheader{Subject} that are present in the original +headers but not the visible headers are added to the visible headers. +\end{methoddesc} + +When a \class{BabylMessage} instance is created based upon a +\class{MaildirMessage} instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MaildirMessage} state} +\lineii{"unseen" label}{no S flag} +\lineii{"deleted" label}{T flag} +\lineii{"answered" label}{R flag} +\lineii{"forwarded" label}{P flag} +\end{tableii} + +When a \class{BabylMessage} instance is created based upon an +\class{mboxMessage} or \class{MMDFMessage} instance, the \mailheader{Status} +and \mailheader{X-Status} headers are omitted and the following conversions +take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{mboxMessage} or \class{MMDFMessage} state} +\lineii{"unseen" label}{no R flag} +\lineii{"deleted" label}{D flag} +\lineii{"answered" label}{A flag} +\end{tableii} + +When a \class{BabylMessage} instance is created based upon an \class{MHMessage} +instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MHMessage} state} +\lineii{"unseen" label}{"unseen" sequence} +\lineii{"answered" label}{"replied" sequence} +\end{tableii} + +\subsubsection{\class{MMDFMessage}} +\label{mailbox-mmdfmessage} + +\begin{classdesc}{MMDFMessage}{\optional{message}} +A message with MMDF-specific behaviors. Parameter \var{message} has the same +meaning as with the \class{Message} constructor. +\end{classdesc} + +As with message in an mbox mailbox, MMDF messages are stored with the sender's +address and the delivery date in an initial line beginning with "From ". +Likewise, flags that indicate the state of the message are typically stored in +\mailheader{Status} and \mailheader{X-Status} headers. + +Conventional flags for MMDF messages are identical to those of mbox message and +are as follows: + +\begin{tableiii}{l|l|l}{textrm}{Flag}{Meaning}{Explanation} +\lineiii{R}{Read}{Read} +\lineiii{O}{Old}{Previously detected by MUA} +\lineiii{D}{Deleted}{Marked for subsequent deletion} +\lineiii{F}{Flagged}{Marked as important} +\lineiii{A}{Answered}{Replied to} +\end{tableiii} + +The "R" and "O" flags are stored in the \mailheader{Status} header, and the +"D", "F", and "A" flags are stored in the \mailheader{X-Status} header. The +flags and headers typically appear in the order mentioned. + +\class{MMDFMessage} instances offer the following methods, which are identical +to those offered by \class{mboxMessage}: + +\begin{methoddesc}{get_from}{} +Return a string representing the "From~" line that marks the start of the +message in an mbox mailbox. The leading "From~" and the trailing newline are +excluded. +\end{methoddesc} + +\begin{methoddesc}{set_from}{from_\optional{, time_=None}} +Set the "From~" line to \var{from_}, which should be specified without a +leading "From~" or trailing newline. For convenience, \var{time_} may be +specified and will be formatted appropriately and appended to \var{from_}. If +\var{time_} is specified, it should be a \class{struct_time} instance, a tuple +suitable for passing to \method{time.strftime()}, or \code{True} (to use +\method{time.gmtime()}). +\end{methoddesc} + +\begin{methoddesc}{get_flags}{} +Return a string specifying the flags that are currently set. If the message +complies with the conventional format, the result is the concatenation in the +following order of zero or one occurrence of each of \character{R}, +\character{O}, \character{D}, \character{F}, and \character{A}. +\end{methoddesc} + +\begin{methoddesc}{set_flags}{flags} +Set the flags specified by \var{flags} and unset all others. Parameter +\var{flags} should be the concatenation in any order of zero or more +occurrences of each of \character{R}, \character{O}, \character{D}, +\character{F}, and \character{A}. +\end{methoddesc} + +\begin{methoddesc}{add_flag}{flag} +Set the flag(s) specified by \var{flag} without changing other flags. To add +more than one flag at a time, \var{flag} may be a string of more than one +character. +\end{methoddesc} + +\begin{methoddesc}{remove_flag}{flag} +Unset the flag(s) specified by \var{flag} without changing other flags. To +remove more than one flag at a time, \var{flag} maybe a string of more than one +character. +\end{methoddesc} + +When an \class{MMDFMessage} instance is created based upon a +\class{MaildirMessage} instance, a "From~" line is generated based upon the +\class{MaildirMessage} instance's delivery date, and the following conversions +take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MaildirMessage} state} +\lineii{R flag}{S flag} +\lineii{O flag}{"cur" subdirectory} +\lineii{D flag}{T flag} +\lineii{F flag}{F flag} +\lineii{A flag}{R flag} +\end{tableii} + +When an \class{MMDFMessage} instance is created based upon an \class{MHMessage} +instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{MHMessage} state} +\lineii{R flag and O flag}{no "unseen" sequence} +\lineii{O flag}{"unseen" sequence} +\lineii{F flag}{"flagged" sequence} +\lineii{A flag}{"replied" sequence} +\end{tableii} + +When an \class{MMDFMessage} instance is created based upon a +\class{BabylMessage} instance, the following conversions take place: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{BabylMessage} state} +\lineii{R flag and O flag}{no "unseen" label} +\lineii{O flag}{"unseen" label} +\lineii{D flag}{"deleted" label} +\lineii{A flag}{"answered" label} +\end{tableii} + +When an \class{MMDFMessage} instance is created based upon an +\class{mboxMessage} instance, the "From~" line is copied and all flags directly +correspond: + +\begin{tableii}{l|l}{textrm} + {Resulting state}{\class{mboxMessage} state} +\lineii{R flag}{R flag} +\lineii{O flag}{O flag} +\lineii{D flag}{D flag} +\lineii{F flag}{F flag} +\lineii{A flag}{A flag} +\end{tableii} + +\subsection{Exceptions} +\label{mailbox-deprecated} + +The following exception classes are defined in the \module{mailbox} module: + +\begin{classdesc}{Error}{} +The based class for all other module-specific exceptions. +\end{classdesc} + +\begin{classdesc}{NoSuchMailboxError}{} +Raised when a mailbox is expected but is not found, such as when instantiating +a \class{Mailbox} subclass with a path that does not exist (and with the +\var{create} parameter set to \code{False}), or when opening a folder that does +not exist. +\end{classdesc} + +\begin{classdesc}{NotEmptyErrorError}{} +Raised when a mailbox is not empty but is expected to be, such as when deleting +a folder that contains messages. +\end{classdesc} + +\begin{classdesc}{ExternalClashError}{} +Raised when some mailbox-related condition beyond the control of the program +causes it to be unable to proceed, such as when failing to acquire a lock that +another program already holds a lock, or when a uniquely-generated file name +already exists. +\end{classdesc} + +\begin{classdesc}{FormatError}{} +Raised when the data in a file cannot be parsed, such as when an \class{MH} +instance attempts to read a corrupted \file{.mh_sequences} file. +\end{classdesc} + +\subsection{Deprecated classes and methods} +\label{mailbox-deprecated} + +Older versions of the \module{mailbox} module do not support modification of +mailboxes, such as adding or removing message, and do not provide classes to +represent format-specific message properties. For backward compatibility, the +older mailbox classes are still available, but the newer classes should be used +in preference to them. + +Older mailbox objects support only iteration and provide a single public +method: + +\begin{methoddesc}{next}{} +Return the next message in the mailbox, created with the optional \var{factory} +argument passed into the mailbox object's constructor. By default this is an +\class{rfc822.Message} object (see the \refmodule{rfc822} module). Depending +on the mailbox implementation the \var{fp} attribute of this object may be a +true file object or a class instance simulating a file object, taking care of +things like message boundaries if multiple mail messages are contained in a +single file, etc. If no more messages are available, this method returns +\code{None}. +\end{methoddesc} + +Most of the older mailbox classes have names that differ from the current +mailbox class names, except for \class{Maildir}. For this reason, the new +\class{Maildir} class defines a \method{next()} method and its constructor +differs slightly from those of the other new mailbox classes. + +The older mailbox classes whose names are not the same as their newer +counterparts are as follows: \begin{classdesc}{UnixMailbox}{fp\optional{, factory}} Access to a classic \UNIX-style mailbox, where all messages are @@ -68,12 +1309,6 @@ The name of the mailbox directory is passed in \var{dirname}. \var{factory} is as with the \class{UnixMailbox} class. \end{classdesc} -\begin{classdesc}{Maildir}{dirname\optional{, factory}} -Access a Qmail mail directory. All new and current mail for the -mailbox specified by \var{dirname} is made available. -\var{factory} is as with the \class{UnixMailbox} class. -\end{classdesc} - \begin{classdesc}{BabylMailbox}{fp\optional{, factory}} Access a Babyl mailbox, which is similar to an MMDF mailbox. In Babyl format, each message has two sets of headers, the @@ -89,11 +1324,8 @@ messages start with the EOOH line and end with a line containing only \class{UnixMailbox} class. \end{classdesc} -Note that because the \refmodule{rfc822} module is deprecated, it is -recommended that you use the \refmodule{email} package to create -message objects from a mailbox. (The default can't be changed for -backwards compatibility reasons.) The safest way to do this is with -bit of code: +If you wish to use the older mailbox classes with the \module{email} module +rather than the deprecated \module{rfc822} module, you can do so as follows: \begin{verbatim} import email @@ -105,17 +1337,14 @@ def msgfactory(fp): return email.message_from_file(fp) except email.Errors.MessageParseError: # Don't return None since that will - # stop the mailbox iterator - return '' + # stop the mailbox iterator + return '' mbox = mailbox.UnixMailbox(fp, msgfactory) \end{verbatim} -The above wrapper is defensive against ill-formed MIME messages in the -mailbox, but you have to be prepared to receive the empty string from -the mailbox's \function{next()} method. On the other hand, if you -know your mailbox contains only well-formed MIME messages, you can -simplify this to: +Alternatively, if you know your mailbox contains only well-formed MIME +messages, you can simplify this to: \begin{verbatim} import email @@ -124,35 +1353,57 @@ import mailbox mbox = mailbox.UnixMailbox(fp, email.message_from_file) \end{verbatim} -\begin{seealso} - \seetitle[http://www.qmail.org/man/man5/mbox.html]{mbox - - file containing mail messages}{Description of the - traditional ``mbox'' mailbox format.} - \seetitle[http://www.qmail.org/man/man5/maildir.html]{maildir - - directory for incoming mail messages}{Description of the - ``maildir'' mailbox format.} - \seetitle[http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html]{Configuring - Netscape Mail on \UNIX: Why the Content-Length Format is - Bad}{A description of problems with relying on the - \mailheader{Content-Length} header for messages stored in - mailbox files.} -\end{seealso} +\subsection{Examples} +\label{mailbox-examples} +A simple example of printing the subjects of all messages in a mailbox that +seem interesting: -\subsection{Mailbox Objects \label{mailbox-objects}} +\begin{verbatim} +import mailbox +for message in mailbox.mbox('~/mbox'): + subject = message['subject'] # Could possibly be None. + if subject and 'python' in subject.lower(): + print subject +\end{verbatim} -All implementations of mailbox objects are iterable objects, and -have one externally visible method. This method is used by iterators -created from mailbox objects and may also be used directly. +A (surprisingly) simple example of copying all mail from a Babyl mailbox to an +MH mailbox, converting all of the format-specific information that can be +converted: -\begin{methoddesc}[mailbox]{next}{} -Return the next message in the mailbox, created with the optional -\var{factory} argument passed into the mailbox object's constructor. -By default this is an \class{rfc822.Message} -object (see the \refmodule{rfc822} module). Depending on the mailbox -implementation the \var{fp} attribute of this object may be a true -file object or a class instance simulating a file object, taking care -of things like message boundaries if multiple mail messages are -contained in a single file, etc. If no more messages are available, -this method returns \code{None}. -\end{methoddesc} +\begin{verbatim} +import mailbox +destination = mailbox.MH('~/Mail') +for message in mailbox.Babyl('~/RMAIL'): + destination.add(MHMessage(message)) +\end{verbatim} + +An example of sorting mail from numerous mailing lists, being careful to avoid +mail corruption due to concurrent modification by other programs, mail loss due +to interruption of the program, or premature termination due to malformed +messages in the mailbox: + +\begin{verbatim} +import mailbox +import email.Errors +list_names = ('python-list', 'python-dev', 'python-bugs') +boxes = dict((name, mailbox.mbox('~/email/%s' % name)) for name in list_names) +inbox = mailbox.Maildir('~/Maildir', None) +for key in inbox.iterkeys(): + try: + message = inbox[key] + except email.Errors.MessageParseError: + continue # The message is malformed. Just leave it. + for name in list_names: + list_id = message['list-id'] + if list_id and name in list_id: + box = boxes[name] + box.lock() + box.add(message) + box.flush() # Write copy to disk before removing original. + box.unlock() + inbox.discard(key) + break # Found destination, so stop looking. +for box in boxes.itervalues(): + box.close() +\end{verbatim} diff --git a/Doc/lib/libmsilib.tex b/Doc/lib/libmsilib.tex new file mode 100644 index 0000000..1e044f4 --- /dev/null +++ b/Doc/lib/libmsilib.tex @@ -0,0 +1,485 @@ +\section{\module{msilib} --- + Read and write Microsoft Installer files} + +\declaremodule{standard}{msilib} + \platform{Windows} +\modulesynopsis{Creation of Microsoft Installer files, and CAB files.} +\moduleauthor{Martin v. L\"owis}{martin@v.loewis.de} +\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de} + +\index{msi} + +\versionadded{2.5} + +The \module{msilib} supports the creation of Microsoft Installer +(\code{.msi}) files. Because these files often contain an embedded +``cabinet'' file (\code{.cab}), it also exposes an API to create +CAB files. Support for reading \code{.cab} files is currently not +implemented; read support for the \code{.msi} database is possible. + +This package aims to provide complete access to all tables in an +\code{.msi} file, therefore, it is a fairly low-level API. Two +primary applications of this package are the \module{distutils} +command \code{bdist_msi}, and the creation of Python installer +package itself (although that currently uses a different version +of \code{msilib}). + +The package contents can be roughly split into four parts: +low-level CAB routines, low-level MSI routines, higher-level +MSI routines, and standard table structures. + +\begin{funcdesc}{FCICreate}{cabname, files} + Create a new CAB file named \var{cabname}. \var{files} must + be a list of tuples, each containing the name of the file on + disk, and the name of the file inside the CAB file. + + The files are added to the CAB file in the order they appear + in the list. All files are added into a single CAB file, + using the MSZIP compression algorithm. + + Callbacks to Python for the various steps of MSI creation + are currently not exposed. +\end{funcdesc} + +\begin{funcdesc}{UUIDCreate}{} + Return the string representation of a new unique identifier. + This wraps the Windows API functions \cfunction{UuidCreate} and + \cfunction{UuidToString}. +\end{funcdesc} + +\begin{funcdesc}{OpenDatabase}{path, persist} + Return a new database object by calling MsiOpenDatabase. + \var{path} is the file name of the + MSI file; \var{persist} can be one of the constants + \code{MSIDBOPEN_CREATEDIRECT}, \code{MSIDBOPEN_CREATE}, + \code{MSIDBOPEN_DIRECT}, \code{MSIDBOPEN_READONLY}, or + \code{MSIDBOPEN_TRANSACT}, and may include the flag + \code{MSIDBOPEN_PATCHFILE}. See the Microsoft documentation for + the meaning of these flags; depending on the flags, + an existing database is opened, or a new one created. +\end{funcdesc} + +\begin{funcdesc}{CreateRecord}{count} + Return a new record object by calling \cfunction{MSICreateRecord}. + \var{count} is the number of fields of the record. +\end{funcdesc} + +\begin{funcdesc}{init_database}{name, schema, ProductName, ProductCode, ProductVersion, Manufacturer} + Create and return a new database \var{name}, initialize it + with \var{schema}, and set the properties \var{ProductName}, + \var{ProductCode}, \var{ProductVersion}, and \var{Manufacturer}. + + \var{schema} must be a module object containing \code{tables} and + \code{_Validation_records} attributes; typically, + \module{msilib.schema} should be used. + + The database will contain just the schema and the validation + records when this function returns. +\end{funcdesc} + +\begin{funcdesc}{add_data}{database, records} + Add all \var{records} to \var{database}. \var{records} should + be a list of tuples, each one containing all fields of a record + according to the schema of the table. For optional fields, + \code{None} can be passed. + + Field values can be int or long numbers, strings, or instances + of the Binary class. +\end{funcdesc} + +\begin{classdesc}{Binary}{filename} + Represents entries in the Binary table; inserting such + an object using \function{add_data} reads the file named + \var{filename} into the table. +\end{classdesc} + +\begin{funcdesc}{add_tables}{database, module} + Add all table content from \var{module} to \var{database}. + \var{module} must contain an attribute \var{tables} + listing all tables for which content should be added, + and one attribute per table that has the actual content. + + This is typically used to install the sequence tables. +\end{funcdesc} + +\begin{funcdesc}{add_stream}{database, name, path} + Add the file \var{path} into the \code{_Stream} table + of \var{database}, with the stream name \var{name}. +\end{funcdesc} + +\begin{funcdesc}{gen_uuid}{} + Return a new UUID, in the format that MSI typically + requires (i.e. in curly braces, and with all hexdigits + in upper-case). +\end{funcdesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/devnotes/winprog/fcicreate.asp]{FCICreateFile}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/rpc/rpc/uuidcreate.asp]{UuidCreate}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/rpc/rpc/uuidtostring.asp]{UuidToString}{} +\end{seealso} + +\subsection{Database Objects\label{database-objects}} + +\begin{methoddesc}{OpenView}{sql} + Return a view object, by calling \cfunction{MSIDatabaseOpenView}. + \var{sql} is the SQL statement to execute. +\end{methoddesc} + +\begin{methoddesc}{Commit}{} + Commit the changes pending in the current transaction, + by calling \cfunction{MSIDatabaseCommit}. +\end{methoddesc} + +\begin{methoddesc}{GetSummaryInformation}{count} + Return a new summary information object, by calling + \cfunction{MsiGetSummaryInformation}. \var{count} is the maximum number of + updated values. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msiopenview.asp]{MSIOpenView}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msidatabasecommit.asp]{MSIDatabaseCommit}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msigetsummaryinformation.asp]{MSIGetSummaryInformation}{} +\end{seealso} + +\subsection{View Objects\label{view-objects}} + +\begin{methoddesc}{Execute}{\optional{params=None}} + Execute the SQL query of the view, through \cfunction{MSIViewExecute}. + \var{params} is an optional record describing actual values + of the parameter tokens in the query. +\end{methoddesc} + +\begin{methoddesc}{GetColumnInfo}{kind} + Return a record describing the columns of the view, through + calling \cfunction{MsiViewGetColumnInfo}. \var{kind} can be either + \code{MSICOLINFO_NAMES} or \code{MSICOLINFO_TYPES}. +\end{methoddesc} + +\begin{methoddesc}{Fetch}{} + Return a result record of the query, through calling + \cfunction{MsiViewFetch}. +\end{methoddesc} + +\begin{methoddesc}{Modify}{kind, data} + Modify the view, by calling \cfunction{MsiViewModify}. \var{kind} + can be one of \code{MSIMODIFY_SEEK}, \code{MSIMODIFY_REFRESH}, + \code{MSIMODIFY_INSERT}, \code{MSIMODIFY_UPDATE}, \code{MSIMODIFY_ASSIGN}, + \code{MSIMODIFY_REPLACE}, \code{MSIMODIFY_MERGE}, \code{MSIMODIFY_DELETE}, + \code{MSIMODIFY_INSERT_TEMPORARY}, \code{MSIMODIFY_VALIDATE}, + \code{MSIMODIFY_VALIDATE_NEW}, \code{MSIMODIFY_VALIDATE_FIELD}, or + \code{MSIMODIFY_VALIDATE_DELETE}. + + \var{data} must be a record describing the new data. +\end{methoddesc} + +\begin{methoddesc}{Close}{} + Close the view, through \cfunction{MsiViewClose}. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msiviewexecute.asp]{MsiViewExecute}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msiviewgetcolumninfo.asp]{MSIViewGetColumnInfo}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msiviewfetch.asp]{MsiViewFetch}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msiviewmodify.asp]{MsiViewModify}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msiviewclose.asp]{MsiViewClose}{} +\end{seealso} + +\subsection{Summary Information Objects\label{summary-objects}} + +\begin{methoddesc}{GetProperty}{field} + Return a property of the summary, through \cfunction{MsiSummaryInfoGetProperty}. + \var{field} is the name of the property, and can be one of the + constants + \code{PID_CODEPAGE}, \code{PID_TITLE}, \code{PID_SUBJECT}, + \code{PID_AUTHOR}, \code{PID_KEYWORDS}, \code{PID_COMMENTS}, + \code{PID_TEMPLATE}, \code{PID_LASTAUTHOR}, \code{PID_REVNUMBER}, + \code{PID_LASTPRINTED}, \code{PID_CREATE_DTM}, \code{PID_LASTSAVE_DTM}, + \code{PID_PAGECOUNT}, \code{PID_WORDCOUNT}, \code{PID_CHARCOUNT}, + \code{PID_APPNAME}, or \code{PID_SECURITY}. +\end{methoddesc} + +\begin{methoddesc}{GetPropertyCount}{} + Return the number of summary properties, through + \cfunction{MsiSummaryInfoGetPropertyCount}. +\end{methoddesc} + +\begin{methoddesc}{SetProperty}{field, value} + Set a property through \cfunction{MsiSummaryInfoSetProperty}. \var{field} + can have the same values as in \method{GetProperty}, \var{value} + is the new value of the property. Possible value types are integer + and string. +\end{methoddesc} + +\begin{methoddesc}{Persist}{} + Write the modified properties to the summary information stream, + using \cfunction{MsiSummaryInfoPersist}. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msisummaryinfogetproperty.asp]{MsiSummaryInfoGetProperty}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msisummaryinfogetpropertycount.asp]{MsiSummaryInfoGetPropertyCount}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msisummaryinfosetproperty.asp]{MsiSummaryInfoSetProperty}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msisummaryinfopersist.asp]{MsiSummaryInfoPersist}{} +\end{seealso} + +\subsection{Record Objects\label{record-objects}} + +\begin{methoddesc}{GetFieldCount}{} + Return the number of fields of the record, through \cfunction{MsiRecordGetFieldCount}. +\end{methoddesc} + +\begin{methoddesc}{SetString}{field, value} + Set \var{field} to \var{value} through \cfunction{MsiRecordSetString}. + \var{field} must be an integer; \var{value} a string. +\end{methoddesc} + +\begin{methoddesc}{SetStream}{field, value} + Set \var{field} to the contents of the file named \var{value}, + through \cfunction{MsiRecordSetStream}. + \var{field} must be an integer; \var{value} a string. +\end{methoddesc} + +\begin{methoddesc}{SetInteger}{field, value} + Set \var{field} to \var{value} through \cfunction{MsiRecordSetInteger}. + Both \var{field} and \var{value} must be an integer. +\end{methoddesc} + +\begin{methoddesc}{ClearData}{} + Set all fields of the record to 0, through \cfunction{MsiRecordClearData}. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msirecordgetfieldcount.asp]{MsiRecordGetFieldCount}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msirecordsetstring.asp]{MsiRecordSetString}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msirecordsetstream.asp]{MsiRecordSetStream}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msirecordsetinteger.asp]{MsiRecordSetInteger}{} + \seetitle[http://msdn.microsoft.com/library/default.asp?url=/library/en-us/msi/setup/msirecordclear.asp]{MsiRecordClear}{} +\end{seealso} + +\subsection{Errors\label{msi-errors}} + +All wrappers around MSI functions raise \exception{MsiError}; +the string inside the exception will contain more detail. + +\subsection{CAB Objects\label{cab}} + +\begin{classdesc}{CAB}{name} + The class \class{CAB} represents a CAB file. During MSI construction, + files will be added simultaneously to the \code{Files} table, and + to a CAB file. Then, when all files have been added, the CAB file + can be written, then added to the MSI file. + + \var{name} is the name of the CAB file in the MSI file. +\end{classdesc} + +\begin{methoddesc}[CAB]{append}{full, logical} + Add the file with the pathname \var{full} to the CAB file, + under the name \var{logical}. If there is already a file + named \var{logical}, a new file name is created. + + Return the index of the file in the CAB file, and the + new name of the file inside the CAB file. +\end{methoddesc} + +\begin{methoddesc}[CAB]{append}{database} + Generate a CAB file, add it as a stream to the MSI file, + put it into the \code{Media} table, and remove the generated + file from the disk. +\end{methoddesc} + +\subsection{Directory Objects\label{msi-directory}} + +\begin{classdesc}{Directory}{database, cab, basedir, physical, + logical, default, component, \optional{componentflags}} + Create a new directory in the Directory table. There is a current + component at each point in time for the directory, which is either + explicitly created through \method{start_component}, or implicitly when files + are added for the first time. Files are added into the current + component, and into the cab file. To create a directory, a base + directory object needs to be specified (can be \code{None}), the path to + the physical directory, and a logical directory name. \var{default} + specifies the DefaultDir slot in the directory table. \var{componentflags} + specifies the default flags that new components get. +\end{classdesc} + +\begin{methoddesc}[Directory]{start_component}{\optional{component\optional{, + feature\optional{, flags\optional{, keyfile\optional{, uuid}}}}}} + Add an entry to the Component table, and make this component the + current component for this directory. If no component name is given, the + directory name is used. If no \var{feature} is given, the current feature + is used. If no \var{flags} are given, the directory's default flags are + used. If no \var{keyfile} is given, the KeyPath is left null in the + Component table. +\end{methoddesc} + +\begin{methoddesc}[Directory]{add_file}{file\optional{, src\optional{, + version\optional{, language}}}} + Add a file to the current component of the directory, starting a new + one if there is no current component. By default, the file name + in the source and the file table will be identical. If the \var{src} file + is specified, it is interpreted relative to the current + directory. Optionally, a \var{version} and a \var{language} can be specified for + the entry in the File table. +\end{methoddesc} + +\begin{methoddesc}[Directory]{glob}{pattern\optional{, exclude}} + Add a list of files to the current component as specified in the glob + pattern. Individual files can be excluded in the \var{exclude} list. +\end{methoddesc} + +\begin{methoddesc}[Directory]{remove_pyc}{} + Remove \code{.pyc}/\code{.pyo} files on uninstall. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/directory_table.asp]{Directory Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/file_table.asp]{File Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/component_table.asp]{Component Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/featurecomponents_table.asp]{FeatureComponents Table}{} +\end{seealso} + + +\subsection{Features\label{features}} + +\begin{classdesc}{Feature}{database, id, title, desc, display\optional{, + level=1\optional{, parent\optional\{, directory\optional{, + attributes=0}}}} + + Add a new record to the \code{Feature} table, using the values + \var{id}, \var{parent.id}, \var{title}, \var{desc}, \var{display}, + \var{level}, \var{directory}, and \var{attributes}. The resulting + feature object can be passed to the \method{start_component} method + of \class{Directory}. +\end{classdesc} + +\begin{methoddesc}[Feature]{set_current}{} + Make this feature the current feature of \module{msilib}. + New components are automatically added to the default feature, + unless a feature is explicitly specified. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/feature_table.asp]{Feature Table}{} +\end{seealso} + +\subsection{GUI classes\label{msi-gui}} + +\module{msilib} provides several classes that wrap the GUI tables in +an MSI database. However, no standard user interface is provided; use +\module{bdist_msi} to create MSI files with a user-interface for +installing Python packages. + +\begin{classdesc}{Control}{dlg, name} + Base class of the dialog controls. \var{dlg} is the dialog object + the control belongs to, and \var{name} is the control's name. +\end{classdesc} + +\begin{methoddesc}[Control]{event}{event, argument\optional{, + condition = ``1''\optional{, ordering}}} + + Make an entry into the \code{ControlEvent} table for this control. +\end{methoddesc} + +\begin{methoddesc}[Control]{mapping}{event, attribute} + Make an entry into the \code{EventMapping} table for this control. +\end{methoddesc} + +\begin{methoddesc}[Control]{condition}{action, condition} + Make an entry into the \code{ControlCondition} table for this control. +\end{methoddesc} + + +\begin{classdesc}{RadioButtonGroup}{dlg, name, property} + Create a radio button control named \var{name}. \var{property} + is the installer property that gets set when a radio button + is selected. +\end{classdesc} + +\begin{methoddesc}[RadioButtonGroup]{add}{name, x, y, width, height, text + \optional{, value}} + Add a radio button named \var{name} to the group, at the + coordinates \var{x}, \var{y}, \var{width}, \var{height}, and + with the label \var{text}. If \var{value} is omitted, it + defaults to \var{name}. +\end{methoddesc} + +\begin{classdesc}{Dialog}{db, name, x, y, w, h, attr, title, first, + default, cancel} + Return a new \class{Dialog} object. An entry in the \code{Dialog} table + is made, with the specified coordinates, dialog attributes, title, + name of the first, default, and cancel controls. +\end{classdesc} + +\begin{methoddesc}[Dialog]{control}{name, type, x, y, width, height, + attributes, property, text, control_next, help} + Return a new \class{Control} object. An entry in the \code{Control} table + is made with the specified parameters. + + This is a generic method; for specific types, specialized methods + are provided. +\end{methoddesc} + + +\begin{methoddesc}[Dialog]{text}{name, x, y, width, height, attributes, text} + Add and return a \code{Text} control. +\end{methoddesc} + +\begin{methoddesc}[Dialog]{bitmap}{name, x, y, width, height, text} + Add and return a \code{Bitmap} control. +\end{methoddesc} + +\begin{methoddesc}[Dialog]{line}{name, x, y, width, height} + Add and return a \code{Line} control. +\end{methoddesc} + +\begin{methoddesc}[Dialog]{pushbutton}{name, x, y, width, height, attributes, + text, next_control} + Add and return a \code{PushButton} control. +\end{methoddesc} + +\begin{methoddesc}[Dialog]{radiogroup}{name, x, y, width, height, + attributes, property, text, next_control} + Add and return a \code{RadioButtonGroup} control. +\end{methoddesc} + +\begin{methoddesc}[Dialog]{checkbox}{name, x, y, width, height, + attributes, property, text, next_control} + Add and return a \code{CheckBox} control. +\end{methoddesc} + +\begin{seealso} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/dialog_table.asp]{Dialog Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/control_table.asp]{Control Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/controls.asp]{Control Types}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/controlcondition_table.asp]{ControlCondition Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/controlevent_table.asp]{ControlEvent Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/eventmapping_table.asp]{EventMapping Table}{} + \seetitle[http://msdn.microsoft.com/library/en-us/msi/setup/radiobutton_table.asp]{RadioButton Table}{} +\end{seealso} + +\subsection{Precomputed tables\label{msi-tables}} + +\module{msilib} provides a few subpackages that contain +only schema and table definitions. Currently, these definitions +are based on MSI version 2.0. + +\begin{datadesc}{schema} + This is the standard MSI schema for MSI 2.0, with the + \var{tables} variable providing a list of table definitions, + and \var{_Validation_records} providing the data for + MSI validation. +\end{datadesc} + +\begin{datadesc}{sequence} + This module contains table contents for the standard sequence + tables: \var{AdminExecuteSequence}, \var{AdminUISequence}, + \var{AdvtExecuteSequence}, \var{InstallExecuteSequence}, and + \var{InstallUISequence}. +\end{datadesc} + +\begin{datadesc}{text} + This module contains definitions for the UIText and ActionText + tables, for the standard installer actions. +\end{datadesc} diff --git a/Doc/lib/liboperator.tex b/Doc/lib/liboperator.tex index 41da9b7..5ba3209 100644 --- a/Doc/lib/liboperator.tex +++ b/Doc/lib/liboperator.tex @@ -320,7 +320,7 @@ and \var{b} sequences. \begin{funcdesc}{irshift}{a, b} \funcline{__irshift__}{a, b} -\code{a = irshift(a, b)} is equivalent to \code{a >}\code{>= b}. +\code{a = irshift(a, b)} is equivalent to \code{a >>= b}. \versionadded{2.5} \end{funcdesc} @@ -499,7 +499,7 @@ symbols in the Python syntax and the functions in the {\code{neg(\var{a})}} \lineiii{Negation (Logical)}{\code{not \var{a}}} {\code{not_(\var{a})}} - \lineiii{Right Shift}{\code{\var{a} >\code{>} \var{b}}} + \lineiii{Right Shift}{\code{\var{a} >> \var{b}}} {\code{rshift(\var{a}, \var{b})}} \lineiii{Sequence Repitition}{\code{\var{seq} * \var{i}}} {\code{repeat(\var{seq}, \var{i})}} diff --git a/Doc/lib/liboptparse.tex b/Doc/lib/liboptparse.tex index 8aca501..ec43e3d 100644 --- a/Doc/lib/liboptparse.tex +++ b/Doc/lib/liboptparse.tex @@ -35,9 +35,9 @@ With these few lines of code, users of your script can now do the \end{verbatim} As it parses the command line, \code{optparse} sets attributes of the -\var{options} object returned by \method{parse{\_}args()} based on user-supplied +\code{options} object returned by \method{parse{\_}args()} based on user-supplied command-line values. When \method{parse{\_}args()} returns from parsing this -command line, \var{options.filename} will be \code{"outfile"} and +command line, \code{options.filename} will be \code{"outfile"} and \code{options.verbose} will be \code{False}. \code{optparse} supports both long and short options, allows short options to be merged together, and allows options to be associated with their arguments in a variety of @@ -100,8 +100,8 @@ options; the traditional \UNIX{} syntax is a hyphen (``-'') followed by a single letter, e.g. \code{"-x"} or \code{"-F"}. Also, traditional \UNIX{} syntax allows multiple options to be merged into a single argument, e.g. \code{"-x -F"} is equivalent to \code{"-xF"}. The GNU project -introduced \code{"{--}"} followed by a series of hyphen-separated words, -e.g. \code{"{--}file"} or \code{"{--}dry-run"}. These are the only two option +introduced \code{"-{}-"} followed by a series of hyphen-separated words, +e.g. \code{"-{}-file"} or \code{"-{}-dry-run"}. These are the only two option syntaxes provided by \module{optparse}. Some other option syntaxes that the world has seen include: @@ -170,7 +170,7 @@ For example, consider this hypothetical command-line: prog -v --report /tmp/report.txt foo bar \end{verbatim} -\code{"-v"} and \code{"{--}report"} are both options. Assuming that +\code{"-v"} and \code{"-{}-report"} are both options. Assuming that \longprogramopt{report} takes one argument, \code{"/tmp/report.txt"} is an option argument. \code{"foo"} and \code{"bar"} are positional arguments. @@ -287,12 +287,12 @@ but that's rarely necessary: by default it uses \code{sys.argv{[}1:]}.) \method{parse{\_}args()} returns two values: \begin{itemize} \item {} -\var{options}, an object containing values for all of your options{---}e.g. if \code{"-{}-file"} takes a single string argument, then -\var{options.file} will be the filename supplied by the user, or +\code{options}, an object containing values for all of your options{---}e.g. if \code{"-{}-file"} takes a single string argument, then +\code{options.file} will be the filename supplied by the user, or \code{None} if the user did not supply that option \item {} -\var{args}, the list of positional arguments leftover after parsing +\code{args}, the list of positional arguments leftover after parsing options \end{itemize} @@ -309,7 +309,7 @@ command line. There is a fixed set of actions hard-coded into \module{optparse} adding new actions is an advanced topic covered in section~\ref{optparse-extending}, Extending \module{optparse}. Most actions tell \module{optparse} to store a value in some variable{---}for example, take a string from the command line and store it in an -attribute of \var{options}. +attribute of \code{options}. If you don't specify an option action, \module{optparse} defaults to \code{store}. @@ -333,8 +333,8 @@ args = ["-f", "foo.txt"] \end{verbatim} When \module{optparse} sees the option string \code{"-f"}, it consumes the next -argument, \code{"foo.txt"}, and stores it in \var{options.filename}. So, -after this call to \method{parse{\_}args()}, \var{options.filename} is +argument, \code{"foo.txt"}, and stores it in \code{options.filename}. So, +after this call to \method{parse{\_}args()}, \code{options.filename} is \code{"foo.txt"}. Some other option types supported by \module{optparse} are \code{int} and \code{float}. @@ -379,7 +379,7 @@ types is covered in section~\ref{optparse-extending}, Extending \module{optparse Flag options{---}set a variable to true or false when a particular option is seen{---}are quite common. \module{optparse} supports them with two separate actions, \code{store{\_}true} and \code{store{\_}false}. For example, you might have a -\var{verbose} flag that is turned on with \code{"-v"} and off with \code{"-q"}: +\code{verbose} flag that is turned on with \code{"-v"} and off with \code{"-q"}: \begin{verbatim} parser.add_option("-v", action="store_true", dest="verbose") parser.add_option("-q", action="store_false", dest="verbose") @@ -421,7 +421,7 @@ want more control. \module{optparse} lets you supply a default value for each destination, which is assigned before the command line is parsed. First, consider the verbose/quiet example. If we want \module{optparse} to set -\var{verbose} to \code{True} unless \code{"-q"} is seen, then we can do this: +\code{verbose} to \code{True} unless \code{"-q"} is seen, then we can do this: \begin{verbatim} parser.add_option("-v", action="store_true", dest="verbose", default=True) parser.add_option("-q", action="store_false", dest="verbose") @@ -441,7 +441,7 @@ parser.add_option("-v", action="store_true", dest="verbose", default=False) parser.add_option("-q", action="store_false", dest="verbose", default=True) \end{verbatim} -Again, the default value for \var{verbose} will be \code{True}: the last +Again, the default value for \code{verbose} will be \code{True}: the last default value supplied for any particular destination is the one that counts. @@ -566,7 +566,7 @@ argument to OptionParser: parser = OptionParser(usage="%prog [-f] [-q]", version="%prog 1.0") \end{verbatim} -Note that \code{"{\%}prog"} is expanded just like it is in \var{usage}. Apart +Note that \code{"{\%}prog"} is expanded just like it is in \code{usage}. Apart from that, \code{version} can contain anything you like. When you supply it, \module{optparse} automatically adds a \code{"-{}-version"} option to your parser. If it encounters this option on the command line, it expands your @@ -580,14 +580,14 @@ foo 1.0 \end{verbatim} -\subsubsection{How \module{optparse} handles errors\label{optparse-how-optik-handles-errors}} +\subsubsection{How \module{optparse} handles errors\label{optparse-how-optparse-handles-errors}} There are two broad classes of errors that \module{optparse} has to worry about: programmer errors and user errors. Programmer errors are usually -erroneous calls to \code{parse.add{\_}option()}, e.g. invalid option strings, +erroneous calls to \code{parser.add{\_}option()}, e.g. invalid option strings, unknown option attributes, missing option attributes, etc. These are dealt with in the usual way: raise an exception (either -\exception{optparse.OptionError} or \exception{TypeError}) and let the program crash. +\code{optparse.OptionError} or \code{TypeError}) and let the program crash. Handling user errors is much more important, since they are guaranteed to happen no matter how stable your code is. \module{optparse} can automatically @@ -659,12 +659,66 @@ def main(): if __name__ == "__main__": main() \end{verbatim} -% $Id: tutorial.txt 415 2004-09-30 02:26:17Z greg $ +% $Id: tutorial.txt 505 2005-07-22 01:52:40Z gward $ \subsection{Reference Guide\label{optparse-reference-guide}} +\subsubsection{Creating the parser\label{optparse-creating-parser}} + +The first step in using \module{optparse} is to create an OptionParser instance: +\begin{verbatim} +parser = OptionParser(...) +\end{verbatim} + +The OptionParser constructor has no required arguments, but a number of +optional keyword arguments. You should always pass them as keyword +arguments, i.e. do not rely on the order in which the arguments are +declared. +\begin{quote} +\begin{description} +\item[\code{usage} (default: \code{"{\%}prog {[}options]"})] +The usage summary to print when your program is run incorrectly or +with a help option. When \module{optparse} prints the usage string, it expands +\code{{\%}prog} to \code{os.path.basename(sys.argv{[}0])} (or to \code{prog} if +you passed that keyword argument). To suppress a usage message, +pass the special value \code{optparse.SUPPRESS{\_}USAGE}. +\item[\code{option{\_}list} (default: \code{{[}]})] +A list of Option objects to populate the parser with. The options +in \code{option{\_}list} are added after any options in +\code{standard{\_}option{\_}list} (a class attribute that may be set by +OptionParser subclasses), but before any version or help options. +Deprecated; use \method{add{\_}option()} after creating the parser instead. +\item[\code{option{\_}class} (default: optparse.Option)] +Class to use when adding options to the parser in \method{add{\_}option()}. +\item[\code{version} (default: \code{None})] +A version string to print when the user supplies a version option. +If you supply a true value for \code{version}, \module{optparse} automatically adds +a version option with the single option string \code{"-{}-version"}. The +substring \code{"{\%}prog"} is expanded the same as for \code{usage}. +\item[\code{conflict{\_}handler} (default: \code{"error"})] +Specifies what to do when options with conflicting option strings +are added to the parser; see section~\ref{optparse-conflicts-between-options}, Conflicts between options. +\item[\code{description} (default: \code{None})] +A paragraph of text giving a brief overview of your program. \module{optparse} +reformats this paragraph to fit the current terminal width and +prints it when the user requests help (after \code{usage}, but before +the list of options). +\item[\code{formatter} (default: a new IndentedHelpFormatter)] +An instance of optparse.HelpFormatter that will be used for +printing help text. \module{optparse} provides two concrete classes for this +purpose: IndentedHelpFormatter and TitledHelpFormatter. +\item[\code{add{\_}help{\_}option} (default: \code{True})] +If true, \module{optparse} will add a help option (with option strings \code{"-h"} +and \code{"-{}-help"}) to the parser. +\item[\code{prog}] +The string to use when expanding \code{"{\%}prog"} in \code{usage} and +\code{version} instead of \code{os.path.basename(sys.argv{[}0])}. +\end{description} +\end{quote} + + \subsubsection{Populating the parser\label{optparse-populating-parser}} There are several ways to populate the parser with options. The @@ -708,38 +762,34 @@ strings, e.g. \programopt{-f} and \longprogramopt{file}. You can specify any number of short or long option strings, but you must specify at least one overall option string. -The canonical way to create an Option instance is by calling -\function{make{\_}option()}, so that is what will be shown here. However, the -most common and convenient way is to use \code{parser.add{\_}option()}. Note -that \function{make{\_}option()} and \code{parser.add{\_}option()} have identical call -signatures: +The canonical way to create an Option instance is with the +\method{add{\_}option()} method of \class{OptionParser}: \begin{verbatim} -make_option(opt_str, ..., attr=value, ...) -parser.add_option(opt_str, ..., attr=value, ...) +parser.add_option(opt_str[, ...], attr=value, ...) \end{verbatim} To define an option with only a short option string: \begin{verbatim} -make_option("-f", attr=value, ...) +parser.add_option("-f", attr=value, ...) \end{verbatim} And to define an option with only a long option string: \begin{verbatim} -make_option("--foo", attr=value, ...) +parser.add_option("--foo", attr=value, ...) \end{verbatim} -The \code{attr=value} keyword arguments define option attributes, -i.e. attributes of the Option object. The most important option -attribute is \member{action}, and it largely determines what other attributes -are relevant or required. If you pass irrelevant option attributes, or -fail to pass required ones, \module{optparse} raises an OptionError exception -explaining your mistake. +The keyword arguments define attributes of the new Option object. The +most important option attribute is \member{action}, and it largely determines +which other attributes are relevant or required. If you pass irrelevant +option attributes, or fail to pass required ones, \module{optparse} raises an +OptionError exception explaining your mistake. -An options's \emph{action} determines what \module{optparse} does when it encounters -this option on the command-line. The actions hard-coded into \module{optparse} are: +An options's \emph{action} determines what \module{optparse} does when it encounters this +option on the command-line. The standard option actions hard-coded into +\module{optparse} are: \begin{description} \item[\code{store}] -store this option's argument {[}default] +store this option's argument (default) \item[\code{store{\_}const}] store a constant value \item[\code{store{\_}true}] @@ -748,6 +798,8 @@ store a true value store a false value \item[\code{append}] append this option's argument to a list +\item[\code{append{\_}const}] +append a constant value to a list \item[\code{count}] increment a counter by one \item[\code{callback}] @@ -762,24 +814,25 @@ action, you may also supply \member{type} and \member{dest} option attributes; s below.) As you can see, most actions involve storing or updating a value -somewhere. \module{optparse} always creates an instance of \code{optparse.Values} -specifically for this purpose; we refer to this instance as \var{options}. -Option arguments (and various other values) are stored as attributes of -this object, according to the \member{dest} (destination) option attribute. +somewhere. \module{optparse} always creates a special object for this, +conventionally called \code{options} (it happens to be an instance of +\code{optparse.Values}). Option arguments (and various other values) are +stored as attributes of this object, according to the \member{dest} +(destination) option attribute. For example, when you call \begin{verbatim} parser.parse_args() \end{verbatim} -one of the first things \module{optparse} does is create the \var{options} object: +one of the first things \module{optparse} does is create the \code{options} object: \begin{verbatim} options = Values() \end{verbatim} If one of the options in this parser is defined with \begin{verbatim} -make_option("-f", "--file", action="store", type="string", dest="filename") +parser.add_option("-f", "--file", action="store", type="string", dest="filename") \end{verbatim} and the command-line being parsed includes any of the following: @@ -790,8 +843,7 @@ and the command-line being parsed includes any of the following: --file foo \end{verbatim} -then \module{optparse}, on seeing the \programopt{-f} or \longprogramopt{file} option, will do the -equivalent of +then \module{optparse}, on seeing this option, will do the equivalent of \begin{verbatim} options.filename = "foo" \end{verbatim} @@ -912,6 +964,13 @@ options.tracks.append(int("4")) \end{verbatim} \item {} +\code{append{\_}const} {[}required: \code{const}; relevant: \member{dest}] + +Like \code{store{\_}const}, but the value \code{const} is appended to \member{dest}; +as with \code{append}, \member{dest} defaults to \code{None}, and an an empty list is +automatically created the first time the option is encountered. + +\item {} \code{count} {[}relevant: \member{dest}] Increment the integer stored at \member{dest}. If no default value is @@ -939,14 +998,9 @@ options.verbosity += 1 \code{callback} {[}required: \code{callback}; relevant: \member{type}, \code{nargs}, \code{callback{\_}args}, \code{callback{\_}kwargs}] -Call the function specified by \code{callback}. The signature of -this function should be +Call the function specified by \code{callback}, which is called as \begin{verbatim} -func(option : Option, - opt : string, - value : any, - parser : OptionParser, - *args, **kwargs) +func(option, opt_str, value, parser, *args, **kwargs) \end{verbatim} See section~\ref{optparse-option-callbacks}, Option Callbacks for more detail. @@ -956,7 +1010,7 @@ See section~\ref{optparse-option-callbacks}, Option Callbacks for more detail. Prints a complete help message for all the options in the current option parser. The help message is constructed from -the \var{usage} string passed to OptionParser's constructor and +the \code{usage} string passed to OptionParser's constructor and the \member{help} string passed to every option. If no \member{help} string is supplied for an option, it will still be @@ -1007,6 +1061,87 @@ constructor. As with \member{help} options, you will rarely create \end{itemize} +\subsubsection{Option attributes\label{optparse-option-attributes}} + +The following option attributes may be passed as keyword arguments +to \code{parser.add{\_}option()}. If you pass an option attribute +that is not relevant to a particular option, or fail to pass a required +option attribute, \module{optparse} raises OptionError. +\begin{itemize} +\item {} +\member{action} (default: \code{"store"}) + +Determines \module{optparse}'s behaviour when this option is seen on the command +line; the available options are documented above. + +\item {} +\member{type} (default: \code{"string"}) + +The argument type expected by this option (e.g., \code{"string"} or +\code{"int"}); the available option types are documented below. + +\item {} +\member{dest} (default: derived from option strings) + +If the option's action implies writing or modifying a value somewhere, +this tells \module{optparse} where to write it: \member{dest} names an attribute of the +\code{options} object that \module{optparse} builds as it parses the command line. + +\item {} +\code{default} (deprecated) + +The value to use for this option's destination if the option is not +seen on the command line. Deprecated; use \code{parser.set{\_}defaults()} +instead. + +\item {} +\code{nargs} (default: 1) + +How many arguments of type \member{type} should be consumed when this +option is seen. If {\textgreater} 1, \module{optparse} will store a tuple of values to +\member{dest}. + +\item {} +\code{const} + +For actions that store a constant value, the constant value to store. + +\item {} +\code{choices} + +For options of type \code{"choice"}, the list of strings the user +may choose from. + +\item {} +\code{callback} + +For options with action \code{"callback"}, the callable to call when this +option is seen. See section~\ref{optparse-option-callbacks}, Option Callbacks for detail on the arguments +passed to \code{callable}. + +\item {} +\code{callback{\_}args}, \code{callback{\_}kwargs} + +Additional positional and keyword arguments to pass to \code{callback} +after the four standard callback arguments. + +\item {} +\member{help} + +Help text to print for this option when listing all available options +after the user supplies a \member{help} option (such as \code{"-{}-help"}). +If no help text is supplied, the option will be listed without help +text. To hide this option, use the special value \code{SUPPRESS{\_}HELP}. + +\item {} +\code{metavar} (default: derived from option strings) + +Stand-in for the option argument(s) to use when printing help text. +See section~\ref{optparse-tutorial}, the tutorial for an example. + +\end{itemize} + + \subsubsection{Standard option types\label{optparse-standard-option-types}} \module{optparse} has six built-in option types: \code{string}, \code{int}, \code{long}, @@ -1017,22 +1152,74 @@ Arguments to string options are not checked or converted in any way: the text on the command line is stored in the destination (or passed to the callback) as-is. -Integer arguments are passed to \code{int()} to convert them to Python -integers. If \code{int()} fails, so will \module{optparse}, although with a more -useful error message. (Internally, \module{optparse} raises -\exception{OptionValueError}; OptionParser catches this exception higher -up and terminates your program with a useful error message.) +Integer arguments (type \code{int} or \code{long}) are parsed as follows: +\begin{quote} +\begin{itemize} +\item {} +if the number starts with \code{0x}, it is parsed as a hexadecimal number + +\item {} +if the number starts with \code{0}, it is parsed as an octal number + +\item {} +if the number starts with \code{0b}, is is parsed as a binary number + +\item {} +otherwise, the number is parsed as a decimal number + +\end{itemize} +\end{quote} + +The conversion is done by calling either \code{int()} or \code{long()} with +the appropriate base (2, 8, 10, or 16). If this fails, so will \module{optparse}, +although with a more useful error message. -Likewise, \code{float} arguments are passed to \code{float()} for conversion, -\code{long} arguments to \code{long()}, and \code{complex} arguments to -\code{complex()}. Apart from that, they are handled identically to integer -arguments. +\code{float} and \code{complex} option arguments are converted directly with +\code{float()} and \code{complex()}, with similar error-handling. \code{choice} options are a subtype of \code{string} options. The \code{choices} option attribute (a sequence of strings) defines the set of allowed -option arguments. \code{optparse.option.check{\_}choice()} compares +option arguments. \code{optparse.check{\_}choice()} compares user-supplied option arguments against this master list and raises -\exception{OptionValueError} if an invalid string is given. +OptionValueError if an invalid string is given. + + +\subsubsection{Parsing arguments\label{optparse-parsing-arguments}} + +The whole point of creating and populating an OptionParser is to call +its \method{parse{\_}args()} method: +\begin{verbatim} +(options, args) = parser.parse_args(args=None, options=None) +\end{verbatim} + +where the input parameters are +\begin{description} +\item[\code{args}] +the list of arguments to process (\code{sys.argv{[}1:]} by default) +\item[\code{options}] +object to store option arguments in (a new instance of +optparse.Values by default) +\end{description} + +and the return values are +\begin{description} +\item[\code{options}] +the same object as was passed in as \code{options}, or the new +optparse.Values instance created by \module{optparse} +\item[\code{args}] +the leftover positional arguments after all options have been +processed +\end{description} + +The most common usage is to supply neither keyword argument. If you +supply a \code{values} object, it will be repeatedly modified with a +\code{setattr()} call for every option argument written to an option +destination, and finally returned by \method{parse{\_}args()}. + +If \method{parse{\_}args()} encounters any errors in the argument list, it calls +the OptionParser's \method{error()} method with an appropriate end-user error +message. This ultimately terminates your process with an exit status of +2 (the traditional \UNIX{} exit status for command-line errors). \subsubsection{Querying and manipulating your option parser\label{optparse-querying-manipulating-option-parser}} @@ -1050,9 +1237,8 @@ Returns the Option instance with the option string \code{opt{\_}str}, or If the OptionParser has an option corresponding to \code{opt{\_}str}, that option is removed. If that option provided any other option strings, all of those option strings become invalid. - If \code{opt{\_}str} does not occur in any option belonging to this -OptionParser, raises \exception{ValueError}. +OptionParser, raises ValueError. \end{description} @@ -1074,20 +1260,20 @@ options. If it finds any, it invokes the current conflict-handling mechanism. You can set the conflict-handling mechanism either in the constructor: \begin{verbatim} -parser = OptionParser(..., conflict_handler="...") +parser = OptionParser(..., conflict_handler=handler) \end{verbatim} or with a separate call: \begin{verbatim} -parser.set_conflict_handler("...") +parser.set_conflict_handler(handler) \end{verbatim} -The available conflict-handling mechanisms are: +The available conflict handlers are: \begin{quote} \begin{description} \item[\code{error} (default)] assume option conflicts are a programming error and raise -\exception{OptionConflictError} +OptionConflictError \item[\code{resolve}] resolve option conflicts intelligently (see below) \end{description} @@ -1131,7 +1317,78 @@ options: -n, --noisy be noisy --dry-run new dry-run option \end{verbatim} -% $Id: reference.txt 415 2004-09-30 02:26:17Z greg $ + + +\subsubsection{Cleanup\label{optparse-cleanup}} + +OptionParser instances have several cyclic references. This should not +be a problem for Python's garbage collector, but you may wish to break +the cyclic references explicitly by calling \code{destroy()} on your +OptionParser once you are done with it. This is particularly useful in +long-running applications where large object graphs are reachable from +your OptionParser. + + +\subsubsection{Other methods\label{optparse-other-methods}} + +OptionParser supports several other public methods: +\begin{itemize} +\item {} +\code{set{\_}usage(usage)} + +Set the usage string according to the rules described above for the +\code{usage} constructor keyword argument. Passing \code{None} sets the +default usage string; use \code{SUPPRESS{\_}USAGE} to suppress a usage +message. + +\item {} +\code{enable{\_}interspersed{\_}args()}, \code{disable{\_}interspersed{\_}args()} + +Enable/disable positional arguments interspersed with options, similar +to GNU getopt (enabled by default). For example, if \code{"-a"} and +\code{"-b"} are both simple options that take no arguments, \module{optparse} +normally accepts this syntax: +\begin{verbatim} +prog -a arg1 -b arg2 +\end{verbatim} + +and treats it as equivalent to +\begin{verbatim} +prog -a -b arg1 arg2 +\end{verbatim} + +To disable this feature, call \code{disable{\_}interspersed{\_}args()}. This +restores traditional \UNIX{} syntax, where option parsing stops with the +first non-option argument. + +\item {} +\code{set{\_}defaults(dest=value, ...)} + +Set default values for several option destinations at once. Using +\method{set{\_}defaults()} is the preferred way to set default values for +options, since multiple options can share the same destination. For +example, if several ``mode'' options all set the same destination, any +one of them can set the default, and the last one wins: +\begin{verbatim} +parser.add_option("--advanced", action="store_const", + dest="mode", const="advanced", + default="novice") # overridden below +parser.add_option("--novice", action="store_const", + dest="mode", const="novice", + default="advanced") # overrides above setting +\end{verbatim} + +To avoid this confusion, use \method{set{\_}defaults()}: +\begin{verbatim} +parser.set_defaults(mode="advanced") +parser.add_option("--advanced", action="store_const", + dest="mode", const="advanced") +parser.add_option("--novice", action="store_const", + dest="mode", const="novice") +\end{verbatim} + +\end{itemize} +% $Id: reference.txt 505 2005-07-22 01:52:40Z gward $ \subsection{Option Callbacks\label{optparse-option-callbacks}} @@ -1234,7 +1491,7 @@ its instance attributes: the current list of leftover arguments, ie. arguments that have been consumed but are neither options nor option arguments. Feel free to modify \code{parser.largs}, e.g. by adding more -arguments to it. (This list will become \var{args}, the second +arguments to it. (This list will become \code{args}, the second return value of \method{parse{\_}args()}.) \item[\code{parser.rargs}] the current list of remaining arguments, ie. with \code{opt{\_}str} and @@ -1260,7 +1517,7 @@ is a dictionary of arbitrary keyword arguments supplied via \subsubsection{Raising errors in a callback\label{optparse-raising-errors-in-callback}} -The callback function should raise \exception{OptionValueError} if there are any +The callback function should raise OptionValueError if there are any problems with the option or its argument(s). \module{optparse} catches this and terminates the program, printing the error message you supply to stderr. Your message should be clear, concise, accurate, and mention diff --git a/Doc/lib/libpdb.tex b/Doc/lib/libpdb.tex index a5b36a6..b252aeb 100644 --- a/Doc/lib/libpdb.tex +++ b/Doc/lib/libpdb.tex @@ -178,12 +178,12 @@ most commands. \item[d(own)] Move the current frame one level down in the stack trace -(to an newer frame). +(to a newer frame). \item[u(p)] Move the current frame one level up in the stack trace -(to a older frame). +(to an older frame). \item[b(reak) \optional{\optional{\var{filename}:}\var{lineno}\code{\Large{|}}\var{function}\optional{, \var{condition}}}] diff --git a/Doc/lib/libposixpath.tex b/Doc/lib/libposixpath.tex index cea963e..9f0de1f 100644 --- a/Doc/lib/libposixpath.tex +++ b/Doc/lib/libposixpath.tex @@ -146,8 +146,9 @@ should detect mount points for all \UNIX{} and \POSIX{} variants. \end{funcdesc} \begin{funcdesc}{join}{path1\optional{, path2\optional{, ...}}} -Joins one or more path components intelligently. If any component is -an absolute path, all previous components are thrown away, and joining +Join one or more path components intelligently. If any component is +an absolute path, all previous components (on Windows, including the +previous drive letter, if there was one) are thrown away, and joining continues. The return value is the concatenation of \var{path1}, and optionally \var{path2}, etc., with exactly one directory separator (\code{os.sep}) inserted between components, unless \var{path2} is diff --git a/Doc/lib/libprofile.tex b/Doc/lib/libprofile.tex index 9ff5ba0..0108b21 100644 --- a/Doc/lib/libprofile.tex +++ b/Doc/lib/libprofile.tex @@ -384,14 +384,15 @@ arguments to supply the globals and locals dictionaries for the \var{command} string. \end{funcdesc} -Analysis of the profiler data is done using this class from the -\module{pstats} module: +Analysis of the profiler data is done using the \class{Stats} class. + +\note{The \class{Stats} class is defined in the \module{pstats} module.} % now switch modules.... % (This \stmodindex use may be hard to change ;-( ) \stmodindex{pstats} -\begin{classdesc}{Stats}{filename\optional{, \moreargs\optional{, stream=sys.stdout}}} +\begin{classdesc}{Stats}{filename\optional{, stream=sys.stdout\optional{, \moreargs}}} This class constructor creates an instance of a ``statistics object'' from a \var{filename} (or set of filenames). \class{Stats} objects are manipulated by methods, in order to print useful reports. You may specify @@ -409,6 +410,8 @@ functions will be coalesced, so that an overall view of several processes can be considered in a single report. If additional files need to be combined with data in an existing \class{Stats} object, the \method{add()} method can be used. + +\versionchanged[The \var{stream} parameter was added]{2.5} \end{classdesc} diff --git a/Doc/lib/librlcompleter.tex b/Doc/lib/librlcompleter.tex index b2a1eba7..cb2ac59 100644 --- a/Doc/lib/librlcompleter.tex +++ b/Doc/lib/librlcompleter.tex @@ -2,18 +2,17 @@ Completion function for GNU readline} \declaremodule{standard}{rlcompleter} - \platform{Unix} \sectionauthor{Moshe Zadka}{moshez@zadka.site.co.il} -\modulesynopsis{Python identifier completion for the GNU readline library.} +\modulesynopsis{Python identifier completion, suitable for the GNU readline library.} -The \module{rlcompleter} module defines a completion function for +The \module{rlcompleter} module defines a completion function suitable for the \refmodule{readline} module by completing valid Python identifiers and keywords. -This module is \UNIX-specific due to its dependence on the -\refmodule{readline} module. - -The \module{rlcompleter} module defines the \class{Completer} class. +When this module is imported on a \UNIX\ platform with the \module{readline} +module available, an instance of the \class{Completer} class is automatically +created and its \method{complete} method is set as the \module{readline} +completer. Example: @@ -44,6 +43,9 @@ else: \end{verbatim} +On platforms without \module{readline}, the \class{Completer} class defined +by this module can still be used for custom purposes. + \subsection{Completer Objects \label{completer-objects}} Completer objects have the following method: diff --git a/Doc/lib/librunpy.tex b/Doc/lib/librunpy.tex index 4be9901..c7a7e51 100644 --- a/Doc/lib/librunpy.tex +++ b/Doc/lib/librunpy.tex @@ -10,7 +10,7 @@ \versionadded{2.5} The \module{runpy} module is used to locate and run Python modules -without importing them first. It's main use is to implement the +without importing them first. Its main use is to implement the \programopt{-m} command line switch that allows scripts to be located using the Python module namespace rather than the filesystem. diff --git a/Doc/lib/libsqlite3.tex b/Doc/lib/libsqlite3.tex new file mode 100644 index 0000000..8c80eb6 --- /dev/null +++ b/Doc/lib/libsqlite3.tex @@ -0,0 +1,503 @@ +\section{\module{sqlite3} --- + DB-API 2.0 interface for SQLite databases} + +\declaremodule{builtin}{sqlite3} +\modulesynopsis{A DB-API 2.0 implementation using SQLite 3.x.} +\sectionauthor{Gerhard Häring}{gh@ghaering.de} +\versionadded{2.5} + +\subsection{Module functions and constants\label{sqlite3-Module-Contents}} + +\begin{datadesc}{PARSE_DECLTYPES} +This constant is meant to be used with the \var{detect_types} parameter of the +\function{connect} function. + +Setting it makes the \module{sqlite3} module parse the declared type for each column it +returns. It will parse out the first word of the declared type, i. e. for +"integer primary key", it will parse out "integer". Then for that column, it +will look into the converters dictionary and use the converter function +registered for that type there. Converter names are case-sensitive! +\end{datadesc} + + +\begin{datadesc}{PARSE_COLNAMES} +This constant is meant to be used with the \var{detect_types} parameter of the +\function{connect} function. + +Setting this makes the SQLite interface parse the column name for each column +it returns. It will look for a string formed [mytype] in there, and then +decide that 'mytype' is the type of the column. It will try to find an entry of +'mytype' in the converters dictionary and then use the converter function found +there to return the value. The column name found in \member{cursor.description} is only +the first word of the column name, i. e. if you use something like +\code{'as "x [datetime]"'} in your SQL, then we will parse out everything until the +first blank for the column name: the column name would simply be "x". +\end{datadesc} + +\begin{funcdesc}{connect}{database\optional{, timeout, isolation_level, detect_types, factory}} +Opens a connection to the SQLite database file \var{database}. You can use +\code{":memory:"} to open a database connection to a database that resides in +RAM instead of on disk. + +When a database is accessed by multiple connections, and one of the processes +modifies the database, the SQLite database is locked until that transaction is +committed. The \var{timeout} parameter specifies how long the connection should +wait for the lock to go away until raising an exception. The default for the +timeout parameter is 5.0 (five seconds). + +For the \var{isolation_level} parameter, please see \member{isolation_level} +\ref{sqlite3-Connection-IsolationLevel} property of \class{Connection} objects. + +SQLite natively supports only the types TEXT, INTEGER, FLOAT, BLOB and NULL. If +you want to use other types, like you have to add support for them yourself. +The \var{detect_types} parameter and the using custom \strong{converters} registered with +the module-level \function{register_converter} function allow you to easily do that. + +\var{detect_types} defaults to 0 (i. e. off, no type detection), you can set it +to any combination of \constant{PARSE_DECLTYPES} and \constant{PARSE_COLNAMES} to turn type +detection on. + +By default, the \module{sqlite3} module uses its \class{Connection} class for the +connect call. You can, however, subclass the \class{Connection} class and make +\function{connect} use your class instead by providing your class for the +\var{factory} parameter. + +Consult the section \ref{sqlite3-Types} of this manual for details. + +The \module{sqlite3} module internally uses a statement cache to avoid SQL parsing +overhead. If you want to explicitly set the number of statements that are +cached for the connection, you can set the \var{cached_statements} parameter. +The currently implemented default is to cache 100 statements. +\end{funcdesc} + +\begin{funcdesc}{register_converter}{typename, callable} +Registers a callable to convert a bytestring from the database into a custom +Python type. The callable will be invoked for all database values that are of +the type \var{typename}. Confer the parameter \var{detect_types} of the +\function{connect} function for how the type detection works. Note that the case of +\var{typename} and the name of the type in your query must match! +\end{funcdesc} + +\begin{funcdesc}{register_adapter}{type, callable} +Registers a callable to convert the custom Python type \var{type} into one of +SQLite's supported types. The callable \var{callable} accepts as single +parameter the Python value, and must return a value of the following types: +int, long, float, str (UTF-8 encoded), unicode or buffer. +\end{funcdesc} + +\begin{funcdesc}{complete_statement}{sql} +Returns \constant{True} if the string \var{sql} one or more complete SQL +statements terminated by semicolons. It does not verify if the SQL is +syntactically correct, only if there are no unclosed string literals and if the +statement is terminated by a semicolon. + +This can be used to build a shell for SQLite, like in the following example: + + \verbatiminput{sqlite3/complete_statement.py} +\end{funcdesc} + +\subsection{Connection Objects \label{sqlite3-Connection-Objects}} + +A \class{Connection} instance has the following attributes and methods: + +\label{sqlite3-Connection-IsolationLevel} +\begin{memberdesc}{isolation_level} + Get or set the current isolation level. None for autocommit mode or one of + "DEFERRED", "IMMEDIATE" or "EXLUSIVE". See Controlling Transactions + \ref{sqlite3-Controlling-Transactions} for a more detailed explanation. +\end{memberdesc} + +\begin{methoddesc}{cursor}{\optional{cursorClass}} + The cursor method accepts a single optional parameter \var{cursorClass}. + This is a custom cursor class which must extend \class{sqlite3.Cursor}. +\end{methoddesc} + +\begin{methoddesc}{execute}{sql, \optional{parameters}} +This is a nonstandard shortcut that creates an intermediate cursor object by +calling the cursor method, then calls the cursor's \method{execute} method with the +parameters given. +\end{methoddesc} + +\begin{methoddesc}{executemany}{sql, \optional{parameters}} +This is a nonstandard shortcut that creates an intermediate cursor object by +calling the cursor method, then calls the cursor's \method{executemany} method with the +parameters given. +\end{methoddesc} + +\begin{methoddesc}{executescript}{sql_script} +This is a nonstandard shortcut that creates an intermediate cursor object by +calling the cursor method, then calls the cursor's \method{executescript} method with the +parameters given. +\end{methoddesc} + +\begin{methoddesc}{create_function}{name, num_params, func} + +Creates a user-defined function that you can later use from within SQL +statements under the function name \var{name}. \var{num_params} is the number +of parameters the function accepts, and \var{func} is a Python callable that is +called as SQL function. + +The function can return any of the types supported by SQLite: unicode, str, +int, long, float, buffer and None. Exceptions in the function are ignored and +they are handled as if the function returned None. + +Example: + + \verbatiminput{sqlite3/md5func.py} +\end{methoddesc} + +\begin{methoddesc}{create_aggregate}{name, num_params, aggregate_class} + +Creates a user-defined aggregate function. + +The aggregate class must implement a \code{step} method, which accepts the +number of parameters \var{num_params}, and a \code{finalize} method which +will return the final result of the aggregate. + +The \code{finalize} method can return any of the types supported by SQLite: +unicode, str, int, long, float, buffer and None. Any exceptions are ignored. + +Example: + + \verbatiminput{sqlite3/mysumaggr.py} +\end{methoddesc} + +\begin{methoddesc}{create_collation}{name, callable} + +Creates a collation with the specified \var{name} and \var{callable}. The +callable will be passed two string arguments. It should return -1 if the first +is ordered lower than the second, 0 if they are ordered equal and 1 and if the +first is ordered higher than the second. Note that this controls sorting +(ORDER BY in SQL) so your comparisons don't affect other SQL operations. + +Note that the callable will get its parameters as Python bytestrings, which +will normally be encoded in UTF-8. + +The following example shows a custom collation that sorts "the wrong way": + + \verbatiminput{sqlite3/collation_reverse.py} + +To remove a collation, call \code{create_collation} with None as callable: + +\begin{verbatim} + con.create_collation("reverse", None) +\end{verbatim} +\end{methoddesc} + + +\begin{memberdesc}{row_factory} + You can change this attribute to a callable that accepts the cursor and + the original row as tuple and will return the real result row. This + way, you can implement more advanced ways of returning results, like + ones that can also access columns by name. + + Example: + + \verbatiminput{sqlite3/row_factory.py} + + If the standard tuple types don't suffice for you, and you want name-based + access to columns, you should consider setting \member{row_factory} to the + highly-optimized sqlite3.Row type. It provides both + index-based and case-insensitive name-based access to columns with almost + no memory overhead. Much better than your own custom dictionary-based + approach or even a db_row based solution. +\end{memberdesc} + +\begin{memberdesc}{text_factory} + Using this attribute you can control what objects are returned for the + TEXT data type. By default, this attribute is set to \class{unicode} and + the \module{sqlite3} module will return Unicode objects for TEXT. If you want to return + bytestrings instead, you can set it to \class{str}. + + For efficiency reasons, there's also a way to return Unicode objects only + for non-ASCII data, and bytestrings otherwise. To activate it, set this + attribute to \constant{sqlite3.OptimizedUnicode}. + + You can also set it to any other callable that accepts a single bytestring + parameter and returns the result object. + + See the following example code for illustration: + + \verbatiminput{sqlite3/text_factory.py} +\end{memberdesc} + +\begin{memberdesc}{total_changes} + Returns the total number of database rows that have be modified, inserted, + or deleted since the database connection was opened. +\end{memberdesc} + + + + + +\subsection{Cursor Objects \label{sqlite3-Cursor-Objects}} + +A \class{Cursor} instance has the following attributes and methods: + +\begin{methoddesc}{execute}{sql, \optional{parameters}} + +Executes a SQL statement. The SQL statement may be parametrized (i. e. +placeholders instead of SQL literals). The \module{sqlite3} module supports two kinds of +placeholders: question marks (qmark style) and named placeholders (named +style). + +This example shows how to use parameters with qmark style: + + \verbatiminput{sqlite3/execute_1.py} + +This example shows how to use the named style: + + \verbatiminput{sqlite3/execute_2.py} + + \method{execute} will only execute a single SQL statement. If you try to + execute more than one statement with it, it will raise a Warning. Use + \method{executescript} if want to execute multiple SQL statements with one + call. +\end{methoddesc} + + +\begin{methoddesc}{executemany}{sql, seq_of_parameters} +Executes a SQL command against all parameter sequences or mappings found in the +sequence \var{sql}. The \module{sqlite3} module also allows +to use an iterator yielding parameters instead of a sequence. + +\verbatiminput{sqlite3/executemany_1.py} + +Here's a shorter example using a generator: + +\verbatiminput{sqlite3/executemany_2.py} +\end{methoddesc} + +\begin{methoddesc}{executescript}{sql_script} + +This is a nonstandard convenience method for executing multiple SQL statements +at once. It issues a COMMIT statement before, then executes the SQL script it +gets as a parameter. + +\var{sql_script} can be a bytestring or a Unicode string. + +Example: + +\verbatiminput{sqlite3/executescript.py} +\end{methoddesc} + +\begin{memberdesc}{rowcount} + Although the \class{Cursor} class of the \module{sqlite3} module implements this + attribute, the database engine's own support for the determination of "rows + affected"/"rows selected" is quirky. + + For \code{SELECT} statements, \member{rowcount} is always None because we cannot + determine the number of rows a query produced until all rows were fetched. + + For \code{DELETE} statements, SQLite reports \member{rowcount} as 0 if you make a + \code{DELETE FROM table} without any condition. + + For \method{executemany} statements, the number of modifications are summed + up into \member{rowcount}. + + As required by the Python DB API Spec, the \member{rowcount} attribute "is -1 + in case no executeXX() has been performed on the cursor or the rowcount + of the last operation is not determinable by the interface". +\end{memberdesc} + +\subsection{SQLite and Python types\label{sqlite3-Types}} + +\subsubsection{Introduction} + +SQLite natively supports the following types: NULL, INTEGER, REAL, TEXT, BLOB. + +The following Python types can thus be sent to SQLite without any problem: + +\begin{tableii} {c|l}{code}{Python type}{SQLite type} +\lineii{None}{NULL} +\lineii{int}{INTEGER} +\lineii{long}{INTEGER} +\lineii{float}{REAL} +\lineii{str (UTF8-encoded)}{TEXT} +\lineii{unicode}{TEXT} +\lineii{buffer}{BLOB} +\end{tableii} + +This is how SQLite types are converted to Python types by default: + +\begin{tableii} {c|l}{code}{SQLite type}{Python type} +\lineii{NULL}{None} +\lineii{INTEGER}{int or long, depending on size} +\lineii{REAL}{float} +\lineii{TEXT}{depends on text_factory, unicode by default} +\lineii{BLOB}{buffer} +\end{tableii} + +The type system of the \module{sqlite3} module is extensible in both ways: you can store +additional Python types in a SQLite database via object adaptation, and you can +let the \module{sqlite3} module convert SQLite types to different Python types via +converters. + +\subsubsection{Using adapters to store additional Python types in SQLite databases} + +Like described before, SQLite supports only a limited set of types natively. To +use other Python types with SQLite, you must \strong{adapt} them to one of the sqlite3 +module's supported types for SQLite. So, one of NoneType, int, long, float, +str, unicode, buffer. + +The \module{sqlite3} module uses the Python object adaptation, like described in PEP 246 +for this. The protocol to use is \class{PrepareProtocol}. + +There are two ways to enable the \module{sqlite3} module to adapt a custom Python type +to one of the supported ones. + +\paragraph{Letting your object adapt itself} + +This is a good approach if you write the class yourself. Let's suppose you have +a class like this: + +\begin{verbatim} +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y +\end{verbatim} + +Now you want to store the point in a single SQLite column. You'll have to +choose one of the supported types first that you use to represent the point in. +Let's just use str and separate the coordinates using a semicolon. Then you +need to give your class a method \code{__conform__(self, protocol)} which must +return the converted value. The parameter \var{protocol} will be +\class{PrepareProtocol}. + +\verbatiminput{sqlite3/adapter_point_1.py} + +\paragraph{Registering an adapter callable} + +The other possibility is to create a function that converts the type to the +string representation and register the function with \method{register_adapter}. + + \verbatiminput{sqlite3/adapter_point_2.py} + +\begin{notice} +The type/class to adapt must be a new-style class, i. e. it must have +\class{object} as one of its bases. +\end{notice} + +The \module{sqlite3} module has two default adapters for Python's builtin +\class{datetime.date} and \class{datetime.datetime} types. Now let's suppose we +want to store \class{datetime.datetime} objects not in ISO representation, but +as Unix timestamp. + + \verbatiminput{sqlite3/adapter_datetime.py} + +\subsubsection{Converting SQLite values to custom Python types} + +Now that's all nice and dandy that you can send custom Python types to SQLite. +But to make it really useful we need to make the Python to SQLite to Python +roundtrip work. + +Enter converters. + +Let's go back to the Point class. We stored the x and y coordinates separated +via semicolons as strings in SQLite. + +Let's first define a converter function that accepts the string as a parameter and constructs a Point object from it. + +\begin{notice} +Converter functions \strong{always} get called with a string, no matter +under which data type you sent the value to SQLite. +\end{notice} + +\begin{notice} +Converter names are looked up in a case-sensitive manner. +\end{notice} + + +\begin{verbatim} + def convert_point(s): + x, y = map(float, s.split(";")) + return Point(x, y) +\end{verbatim} + +Now you need to make the \module{sqlite3} module know that what you select from the +database is actually a point. There are two ways of doing this: + +\begin{itemize} + \item Implicitly via the declared type + \item Explicitly via the column name +\end{itemize} + +Both ways are described at \ref{sqlite3-Module-Contents} in the text explaining +the constants \constant{PARSE_DECLTYPES} and \constant{PARSE_COlNAMES}. + + +The following example illustrates both ways. + + \verbatiminput{sqlite3/converter_point.py} + +\subsubsection{Default adapters and converters} + +There are default adapters for the date and datetime types in the datetime +module. They will be sent as ISO dates/ISO timestamps to SQLite. + +The default converters are registered under the name "date" for datetime.date +and under the name "timestamp" for datetime.datetime. + +This way, you can use date/timestamps from Python without any additional +fiddling in most cases. The format of the adapters is also compatible with the +experimental SQLite date/time functions. + +The following example demonstrates this. + + \verbatiminput{sqlite3/pysqlite_datetime.py} + +\subsection{Controlling Transactions \label{sqlite3-Controlling-Transactions}} + +By default, the \module{sqlite3} module opens transactions implicitly before a DML +statement (INSERT/UPDATE/DELETE/REPLACE), and commits transactions implicitly +before a non-DML, non-DQL statement (i. e. anything other than +SELECT/INSERT/UPDATE/DELETE/REPLACE). + +So if you are within a transaction, and issue a command like \code{CREATE TABLE +...}, \code{VACUUM}, \code{PRAGMA}, the \module{sqlite3} module will commit implicitly +before executing that command. There are two reasons for doing that. The first +is that some of these commands don't work within transactions. The other reason +is that pysqlite needs to keep track of the transaction state (if a transaction +is active or not). + +You can control which kind of "BEGIN" statements pysqlite implicitly executes +(or none at all) via the \var{isolation_level} parameter to the +\function{connect} call, or via the \member{isolation_level} property of +connections. + +If you want \strong{autocommit mode}, then set \member{isolation_level} to None. + +Otherwise leave it at it's default, which will result in a plain "BEGIN" +statement, or set it to one of SQLite's supported isolation levels: DEFERRED, +IMMEDIATE or EXCLUSIVE. + +As the \module{sqlite3} module needs to keep track of the transaction state, you should +not use \code{OR ROLLBACK} or \code{ON CONFLICT ROLLBACK} in your SQL. Instead, +catch the \exception{IntegrityError} and call the \method{rollback} method of +the connection yourself. + +\subsection{Using pysqlite efficiently} + +\subsubsection{Using shortcut methods} + +Using the nonstandard \method{execute}, \method{executemany} and +\method{executescript} methods of the \class{Connection} object, your code can +be written more concisely, because you don't have to create the - often +superfluous \class{Cursor} objects explicitly. Instead, the \class{Cursor} +objects are created implicitly and these shortcut methods return the cursor +objects. This way, you can for example execute a SELECT statement and iterate +over it directly using only a single call on the \class{Connection} object. + + \verbatiminput{sqlite3/shortcut_methods.py} + +\subsubsection{Accessing columns by name instead of by index} + +One cool feature of the \module{sqlite3} module is the builtin \class{sqlite3.Row} class +designed to be used as a row factory. + +Rows wrapped with this class can be accessed both by index (like tuples) and +case-insensitively by name: + + \verbatiminput{sqlite3/rowclass.py} + + diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex index 8d011fd..f44360b 100644 --- a/Doc/lib/libstdtypes.tex +++ b/Doc/lib/libstdtypes.tex @@ -1,12 +1,11 @@ \section{Built-in Types \label{types}} The following sections describe the standard types that are built into -the interpreter. Historically, Python's built-in types have differed -from user-defined types because it was not possible to use the built-in -types as the basis for object-oriented inheritance. With the 2.2 -release this situation has started to change, although the intended -unification of user-defined and built-in types is as yet far from -complete. +the interpreter. +\note{Historically (until release 2.2), Python's built-in types have +differed from user-defined types because it was not possible to use +the built-in types as the basis for object-oriented inheritance. +This limitation does not exist any longer.} The principal built-in types are numerics, sequences, mappings, files classes, instances and exceptions. @@ -19,7 +18,7 @@ the equivalent \function{repr()} function, or the slightly different \function{str()} function). The latter function is implicitly used when an object is written by the \keyword{print}\stindex{print} statement. -(Information on \ulink{\keyword{print} statement}{../ref/print.html} +(Information on the \ulink{\keyword{print} statement}{../ref/print.html} and other language statements can be found in the \citetitle[../ref/ref.html]{Python Reference Manual} and the \citetitle[../tut/tut.html]{Python Tutorial}.) @@ -728,6 +727,15 @@ a prefix; rather, all combinations of its values are stripped: \versionchanged[Support for the \var{chars} argument]{2.2.2} \end{methoddesc} +\begin{methoddesc}[string]{partition}{sep} +Split the string at the first occurrence of \var{sep}, and return +a 3-tuple containing the part before the separator, the separator +itself, and the part after the separator. If the separator is not +found, return a 3-tuple containing the string itself, followed by +two empty strings. +\versionadded{2.5} +\end{methoddesc} + \begin{methoddesc}[string]{replace}{old, new\optional{, count}} Return a copy of the string with all occurrences of substring \var{old} replaced by \var{new}. If the optional argument @@ -755,6 +763,15 @@ The original string is returned if \versionchanged[Support for the \var{fillchar} argument]{2.4} \end{methoddesc} +\begin{methoddesc}[string]{rpartition}{sep} +Split the string at the last occurrence of \var{sep}, and return +a 3-tuple containing the part before the separator, the separator +itself, and the part after the separator. If the separator is not +found, return a 3-tuple containing the string itself, followed by +two empty strings. +\versionadded{2.5} +\end{methoddesc} + \begin{methoddesc}[string]{rsplit}{\optional{sep \optional{,maxsplit}}} Return a list of the words in the string, using \var{sep} as the delimiter string. If \var{maxsplit} is given, at most \var{maxsplit} @@ -971,20 +988,22 @@ The conversion types are: \lineiii{u}{Unsigned decimal.}{} \lineiii{x}{Unsigned hexadecimal (lowercase).}{(2)} \lineiii{X}{Unsigned hexadecimal (uppercase).}{(2)} - \lineiii{e}{Floating point exponential format (lowercase).}{} - \lineiii{E}{Floating point exponential format (uppercase).}{} - \lineiii{f}{Floating point decimal format.}{} - \lineiii{F}{Floating point decimal format.}{} - \lineiii{g}{Same as \character{e} if exponent is greater than -4 or - less than precision, \character{f} otherwise.}{} - \lineiii{G}{Same as \character{E} if exponent is greater than -4 or - less than precision, \character{F} otherwise.}{} + \lineiii{e}{Floating point exponential format (lowercase).}{(3)} + \lineiii{E}{Floating point exponential format (uppercase).}{(3)} + \lineiii{f}{Floating point decimal format.}{(3)} + \lineiii{F}{Floating point decimal format.}{(3)} + \lineiii{g}{Floating point format. Uses exponential format + if exponent is greater than -4 or less than precision, + decimal format otherwise.}{(4)} + \lineiii{G}{Floating point format. Uses exponential format + if exponent is greater than -4 or less than precision, + decimal format otherwise.}{(4)} \lineiii{c}{Single character (accepts integer or single character string).}{} \lineiii{r}{String (converts any python object using - \function{repr()}).}{(3)} + \function{repr()}).}{(5)} \lineiii{s}{String (converts any python object using - \function{str()}).}{(4)} + \function{str()}).}{(6)} \lineiii{\%}{No argument is converted, results in a \character{\%} character in the result.}{} \end{tableiii} @@ -1004,10 +1023,27 @@ Notes: formatting of the number if the leading character of the result is not already a zero. \item[(3)] - The \code{\%r} conversion was added in Python 2.0. + The alternate form causes the result to always contain a decimal + point, even if no digits follow it. + + The precision determines the number of digits after the decimal + point and defaults to 6. \item[(4)] + The alternate form causes the result to always contain a decimal + point, and trailing zeroes are not removed as they would + otherwise be. + + The precision determines the number of significant digits before + and after the decimal point and defaults to 6. + \item[(5)] + The \code{\%r} conversion was added in Python 2.0. + + The precision determines the maximal number of characters used. + \item[(6)] If the object or format provided is a \class{unicode} string, the resulting string will also be \class{unicode}. + + The precision determines the maximal number of characters used. \end{description} % XXX Examples? @@ -1747,6 +1783,87 @@ implemented in C will have to provide a writable \end{memberdesc} +\subsection{Context Manager Types \label{typecontextmanager}} + +\versionadded{2.5} +\index{context manager} +\index{context management protocol} +\index{protocol!context management} + +Python's \keyword{with} statement supports the concept of a runtime +context defined by a context manager. This is implemented using +two separate methods that allow user-defined classes to define +a runtime context that is entered before the statement body is +executed and exited when the statement ends. + +The \dfn{context management protocol} consists of a pair of +methods that need to be provided for a context manager object to +define a runtime context: + +\begin{methoddesc}[context manager]{__enter__}{} + Enter the runtime context and return either this object or another + object related to the runtime context. The value returned by this + method is bound to the identifier in the \keyword{as} clause of + \keyword{with} statements using this context manager. + + An example of a context manager that returns itself is a file object. + File objects return themselves from __enter__() to allow + \function{open()} to be used as the context expression in a + \keyword{with} statement. + + An example of a context manager that returns a related + object is the one returned by \code{decimal.Context.get_manager()}. + These managers set the active decimal context to a copy of the + original decimal context and then return the copy. This allows + changes to be made to the current decimal context in the body of + the \keyword{with} statement without affecting code outside + the \keyword{with} statement. +\end{methoddesc} + +\begin{methoddesc}[context manager]{__exit__}{exc_type, exc_val, exc_tb} + Exit the runtime context and return a Boolean flag indicating if any + expection that occurred should be suppressed. If an exception + occurred while executing the body of the \keyword{with} statement, the + arguments contain the exception type, value and traceback information. + Otherwise, all three arguments are \var{None}. + + Returning a true value from this method will cause the \keyword{with} + statement to suppress the exception and continue execution with the + statement immediately following the \keyword{with} statement. Otherwise + the exception continues propagating after this method has finished + executing. Exceptions that occur during execution of this method will + replace any exception that occurred in the body of the \keyword{with} + statement. + + The exception passed in should never be reraised explicitly - instead, + this method should return a false value to indicate that the method + completed successfully and does not want to suppress the raised + exception. This allows context management code (such as + \code{contextlib.nested}) to easily detect whether or not an + \method{__exit__()} method has actually failed. +\end{methoddesc} + +Python defines several context managers to support easy thread +synchronisation, prompt closure of files or other objects, and +simpler manipulation of the active decimal arithmetic +context. The specific types are not treated specially beyond +their implementation of the context management protocol. + +Python's generators and the \code{contextlib.contextfactory} decorator +provide a convenient way to implement these protocols. If a generator +function is decorated with the \code{contextlib.contextfactory} +decorator, it will return a context manager implementing the necessary +\method{__enter__()} and \method{__exit__()} methods, rather than the +iterator produced by an undecorated generator function. + +Note that there is no specific slot for any of these methods in the +type structure for Python objects in the Python/C API. Extension +types wanting to define these methods must provide them as a normal +Python accessible method. Compared to the overhead of setting up the +runtime context, the overhead of a single class dictionary lookup +is negligible. + + \subsection{Other Built-in Types \label{typesother}} The interpreter supports several other kinds of objects. diff --git a/Doc/lib/libsubprocess.tex b/Doc/lib/libsubprocess.tex index 4417797..bde92eb 100644 --- a/Doc/lib/libsubprocess.tex +++ b/Doc/lib/libsubprocess.tex @@ -70,10 +70,10 @@ value for \var{bufsize} is \constant{0} (unbuffered). The \var{executable} argument specifies the program to execute. It is very seldom needed: Usually, the program to execute is defined by the -\var{args} argument. If \var{shell=True}, the \var{executable} +\var{args} argument. If \code{shell=True}, the \var{executable} argument specifies which shell to use. On \UNIX{}, the default shell -is /bin/sh. On Windows, the default shell is specified by the COMSPEC -environment variable. +is \file{/bin/sh}. On Windows, the default shell is specified by the +\envvar{COMSPEC} environment variable. \var{stdin}, \var{stdout} and \var{stderr} specify the executed programs' standard input, standard output and standard error file @@ -88,16 +88,19 @@ handle as for stdout. If \var{preexec_fn} is set to a callable object, this object will be called in the child process just before the child is executed. +(\UNIX{} only) If \var{close_fds} is true, all file descriptors except \constant{0}, \constant{1} and \constant{2} will be closed before the child process is -executed. +executed. (\UNIX{} only) If \var{shell} is \constant{True}, the specified command will be executed through the shell. -If \var{cwd} is not \code{None}, the current directory will be changed -to cwd before the child is executed. +If \var{cwd} is not \code{None}, the child's current directory will be +changed to \var{cwd} before it is executed. Note that this directory +is not considered when searching the executable, so you can't specify +the program's path relative to \var{cwd}. If \var{env} is not \code{None}, it defines the environment variables for the new process. diff --git a/Doc/lib/libsys.tex b/Doc/lib/libsys.tex index 1a57da4..6b5b755 100644 --- a/Doc/lib/libsys.tex +++ b/Doc/lib/libsys.tex @@ -410,7 +410,7 @@ else: Strings specifying the primary and secondary prompt of the interpreter. These are only defined if the interpreter is in interactive mode. Their initial values in this case are - \code{'>\code{>}> '} and \code{'... '}. If a non-string object is + \code{'>>>~'} and \code{'...~'}. If a non-string object is assigned to either variable, its \function{str()} is re-evaluated each time the interpreter prepares to read a new interactive command; this can be used to implement a dynamic prompt. diff --git a/Doc/lib/libtarfile.tex b/Doc/lib/libtarfile.tex index f705e8d..ca6e65a 100644 --- a/Doc/lib/libtarfile.tex +++ b/Doc/lib/libtarfile.tex @@ -128,7 +128,7 @@ Some facts and figures: \seemodule{zipfile}{Documentation of the \refmodule{zipfile} standard module.} - \seetitle[http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html\#SEC134] + \seetitle[http://www.gnu.org/software/tar/manual/html_node/tar_134.html\#SEC134] {GNU tar manual, Basic Tar Format}{Documentation for tar archive files, including GNU tar extensions.} \end{seealso} @@ -334,8 +334,12 @@ the file's data itself. Create and return a \class{TarInfo} object from a string buffer. \end{methoddesc} -\begin{methoddesc}{tobuf}{} +\begin{methoddesc}{tobuf}{posix} Create a string buffer from a \class{TarInfo} object. + See \class{TarFile}'s \member{posix} attribute for information + on the \var{posix} argument. It defaults to \constant{False}. + + \versionadded[The \var{posix} parameter]{2.5} \end{methoddesc} A \code{TarInfo} object has the following public data attributes: diff --git a/Doc/lib/libthread.tex b/Doc/lib/libthread.tex index 9e0c202..9573ab3 100644 --- a/Doc/lib/libthread.tex +++ b/Doc/lib/libthread.tex @@ -44,8 +44,8 @@ then the thread exits (but other threads continue to run). \end{funcdesc} \begin{funcdesc}{interrupt_main}{} -Raise a KeyboardInterrupt in the main thread. A subthread can use this -function to interrupt the main thread. +Raise a \exception{KeyboardInterrupt} exception in the main thread. A subthread +can use this function to interrupt the main thread. \versionadded{2.3} \end{funcdesc} diff --git a/Doc/lib/libtokenize.tex b/Doc/lib/libtokenize.tex index cdbb4b8..8c9ad3e 100644 --- a/Doc/lib/libtokenize.tex +++ b/Doc/lib/libtokenize.tex @@ -47,7 +47,7 @@ An older entry point is retained for backward compatibility: call to the function should return one line of input as a string. Alternately, \var{readline} may be a callable object that signals completion by raising \exception{StopIteration}. - \versionchanged[Added StopIteration support]{2.5} + \versionchanged[Added \exception{StopIteration} support]{2.5} The second parameter, \var{tokeneater}, must also be a callable object. It is called once for each token, with five arguments, diff --git a/Doc/lib/libtrace.tex b/Doc/lib/libtrace.tex new file mode 100644 index 0000000..2465aac --- /dev/null +++ b/Doc/lib/libtrace.tex @@ -0,0 +1,125 @@ +\section{\module{trace} --- + Trace or track Python statement execution} + +\declaremodule{standard}{trace} +\modulesynopsis{Trace or track Python statement execution.} + +The \module{trace} module allows you to trace program execution, generate +annotated statement coverage listings, print caller/callee relationships and +list functions executed during a program run. It can be used in another +program or from the command line. + +\subsection{Command Line Usage\label{trace-cli}} + +The \module{trace} module can be invoked from the command line. It can be +as simple as + +\begin{verbatim} +python -m trace --count somefile.py ... +\end{verbatim} + +The above will generate annotated listings of all Python modules imported +during the execution of \file{somefile.py}. + +The following command-line arguments are supported: + +\begin{description} +\item[\longprogramopt{trace}, \programopt{-t}] +Display lines as they are executed. + +\item[\longprogramopt{count}, \programopt{-c}] +Produce a set of annotated listing files upon program +completion that shows how many times each statement was executed. + +\item[\longprogramopt{report}, \programopt{-r}] +Produce an annotated list from an earlier program run that +used the \longprogramopt{count} and \longprogramopt{file} arguments. + +\item[\longprogramopt{no-report}, \programopt{-R}] +Do not generate annotated listings. This is useful if you intend to make +several runs with \longprogramopt{count} then produce a single set +of annotated listings at the end. + +\item[\longprogramopt{listfuncs}, \programopt{-l}] +List the functions executed by running the program. + +\item[\longprogramopt{trackcalls}, \programopt{-T}] +Generate calling relationships exposed by running the program. + +\item[\longprogramopt{file}, \programopt{-f}] +Name a file containing (or to contain) counts. + +\item[\longprogramopt{coverdir}, \programopt{-C}] +Name a directory in which to save annotated listing files. + +\item[\longprogramopt{missing}, \programopt{-m}] +When generating annotated listings, mark lines which +were not executed with `\code{>>>>>>}'. + +\item[\longprogramopt{summary}, \programopt{-s}] +When using \longprogramopt{count} or \longprogramopt{report}, write a +brief summary to stdout for each file processed. + +\item[\longprogramopt{ignore-module}] +Ignore the named module and its submodules (if it is +a package). May be given multiple times. + +\item[\longprogramopt{ignore-dir}] +Ignore all modules and packages in the named directory +and subdirectories. May be given multiple times. +\end{description} + +\subsection{Programming Interface\label{trace-api}} + +\begin{classdesc}{Trace}{\optional{count=1\optional{, trace=1\optional{, + countfuncs=0\optional{, countcallers=0\optional{, + ignoremods=()\optional{, ignoredirs=()\optional{, + infile=None\optional{, outfile=None}}}}}}}}} +Create an object to trace execution of a single statement or expression. +All parameters are optional. \var{count} enables counting of line numbers. +\var{trace} enables line execution tracing. \var{countfuncs} enables +listing of the functions called during the run. \var{countcallers} enables +call relationship tracking. \var{ignoremods} is a list of modules or +packages to ignore. \var{ignoredirs} is a list of directories whose modules +or packages should be ignored. \var{infile} is the file from which to read +stored count information. \var{outfile} is a file in which to write updated +count information. +\end{classdesc} + +\begin{methoddesc}[Trace]{run}{cmd} +Run \var{cmd} under control of the Trace object with the current tracing +parameters. +\end{methoddesc} + +\begin{methoddesc}[Trace]{runctx}{cmd\optional{, globals=None\optional{, + locals=None}}} +Run \var{cmd} under control of the Trace object with the current tracing +parameters in the defined global and local environments. If not defined, +\var{globals} and \var{locals} default to empty dictionaries. +\end{methoddesc} + +\begin{methoddesc}[Trace]{runfunc}{func, *args, **kwds} +Call \var{func} with the given arguments under control of the +\class{Trace} object with the current tracing parameters. +\end{methoddesc} + +This is a simple example showing the use of this module: + +\begin{verbatim} +import sys +import trace + +# create a Trace object, telling it what to ignore, and whether to +# do tracing or line-counting or both. +tracer = trace.Trace( + ignoredirs=[sys.prefix, sys.exec_prefix], + trace=0, + count=1) + +# run the new command using the given tracer +tracer.run('main()') + +# make a report, placing output in /tmp +r = tracer.results() +r.write_results(show_missing=True, coverdir="/tmp") +\end{verbatim} diff --git a/Doc/lib/libunittest.tex b/Doc/lib/libunittest.tex index 6c8769d..51b321e 100644 --- a/Doc/lib/libunittest.tex +++ b/Doc/lib/libunittest.tex @@ -226,7 +226,7 @@ runs, an exception will be raised, and the testing framework will identify the test case as a \dfn{failure}. Other exceptions that do not arise from checks made through the \method{assert*()} and \method{fail*()} methods are identified by the testing framework as -dfn{errors}. +\dfn{errors}. The way to run a test case will be described later. For now, note that to construct an instance of such a test case, we call its diff --git a/Doc/lib/liburllib2.tex b/Doc/lib/liburllib2.tex index e0c4568..7c8ad5d 100644 --- a/Doc/lib/liburllib2.tex +++ b/Doc/lib/liburllib2.tex @@ -621,14 +621,20 @@ user/password. \subsection{AbstractBasicAuthHandler Objects \label{abstract-basic-auth-handler}} -\begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request} +\begin{methoddesc}[AbstractBasicAuthHandler]{http_error_auth_reqed} {authreq, host, req, headers} Handle an authentication request by getting a user/password pair, and re-trying the request. \var{authreq} should be the name of the header where the information about the realm is included in the request, -\var{host} is the host to authenticate to, \var{req} should be the -(failed) \class{Request} object, and \var{headers} should be the error -headers. +\var{host} specifies the URL and path to authenticate for, \var{req} +should be the (failed) \class{Request} object, and \var{headers} +should be the error headers. + +\var{host} is either an authority (e.g. \code{"python.org"}) or a URL +containing an authority component (e.g. \code{"http://python.org/"}). +In either case, the authority must not contain a userinfo component +(so, \code{"python.org"} and \code{"python.org:80"} are fine, +\code{"joe:password@python.org"} is not). \end{methoddesc} @@ -653,7 +659,7 @@ Retry the request with authentication information, if available. \subsection{AbstractDigestAuthHandler Objects \label{abstract-digest-auth-handler}} -\begin{methoddesc}[AbstractDigestAuthHandler]{handle_authentication_request} +\begin{methoddesc}[AbstractDigestAuthHandler]{http_error_auth_reqed} {authreq, host, req, headers} \var{authreq} should be the name of the header where the information about the realm is included in the request, \var{host} should be the host to diff --git a/Doc/lib/libweakref.tex b/Doc/lib/libweakref.tex index 840b674..fc949e6 100644 --- a/Doc/lib/libweakref.tex +++ b/Doc/lib/libweakref.tex @@ -147,6 +147,24 @@ information. to vanish "by magic" (as a side effect of garbage collection).} \end{classdesc} +\class{WeakKeyDictionary} objects have the following additional +methods. These expose the internal references directly. The +references are not guaranteed to be ``live'' at the time they are +used, so the result of calling the references needs to be checked +before being used. This can be used to avoid creating references that +will cause the garbage collector to keep the keys around longer than +needed. + +\begin{methoddesc}{iterkeyrefs}{} + Return an iterator that yields the weak references to the keys. + \versionadded{2.5} +\end{methoddesc} + +\begin{methoddesc}{keyrefs}{} + Return a list of weak references to the keys. + \versionadded{2.5} +\end{methoddesc} + \begin{classdesc}{WeakValueDictionary}{\optional{dict}} Mapping class that references values weakly. Entries in the dictionary will be discarded when no strong reference to the value @@ -160,6 +178,21 @@ information. to vanish "by magic" (as a side effect of garbage collection).} \end{classdesc} +\class{WeakValueDictionary} objects have the following additional +methods. These method have the same issues as the +\method{iterkeyrefs()} and \method{keyrefs()} methods of +\class{WeakKeyDictionary} objects. + +\begin{methoddesc}{itervaluerefs}{} + Return an iterator that yields the weak references to the values. + \versionadded{2.5} +\end{methoddesc} + +\begin{methoddesc}{valuerefs}{} + Return a list of weak references to the values. + \versionadded{2.5} +\end{methoddesc} + \begin{datadesc}{ReferenceType} The type object for weak references objects. \end{datadesc} diff --git a/Doc/lib/libxmlrpclib.tex b/Doc/lib/libxmlrpclib.tex index 1c36f99..3645b82 100644 --- a/Doc/lib/libxmlrpclib.tex +++ b/Doc/lib/libxmlrpclib.tex @@ -81,9 +81,11 @@ Python type): This is the full set of data types supported by XML-RPC. Method calls may also raise a special \exception{Fault} instance, used to signal XML-RPC server errors, or \exception{ProtocolError} used to signal an -error in the HTTP/HTTPS transport layer. Note that even though starting -with Python 2.2 you can subclass builtin types, the xmlrpclib module -currently does not marshal instances of such subclasses. +error in the HTTP/HTTPS transport layer. Both \exception{Fault} and +\exception{ProtocolError} derive from a base class called +\exception{Error}. Note that even though starting with Python 2.2 you +can subclass builtin types, the xmlrpclib module currently does not +marshal instances of such subclasses. When passing strings, characters special to XML such as \samp{<}, \samp{>}, and \samp{\&} will be automatically escaped. However, it's @@ -340,6 +342,7 @@ objects, they are converted to \class{DateTime} objects internally, so only \begin{verbatim} # simple test program (from the XML-RPC specification) +from xmlrpclib import ServerProxy, Error # server = ServerProxy("http://localhost:8000") # local server server = ServerProxy("http://betty.userland.com") diff --git a/Doc/lib/libzlib.tex b/Doc/lib/libzlib.tex index dfbb43d..876f8c0 100644 --- a/Doc/lib/libzlib.tex +++ b/Doc/lib/libzlib.tex @@ -123,6 +123,12 @@ prevents compressing any more data. After calling action is to delete the object. \end{methoddesc} +\begin{methoddesc}[Compress]{copy}{} +Returns a copy of the compression object. This can be used to efficiently +compress a set of data that share a common initial prefix. +\versionadded{2.5} +\end{methoddesc} + Decompression objects support the following methods, and two attributes: \begin{memberdesc}{unused_data} @@ -176,6 +182,13 @@ The optional parameter \var{length} sets the initial size of the output buffer. \end{methoddesc} +\begin{methoddesc}[Decompress]{copy}{} +Returns a copy of the decompression object. This can be used to save the +state of the decompressor midway through the data stream in order to speed up +random seeks into the stream at a future point. +\versionadded{2.5} +\end{methoddesc} + \begin{seealso} \seemodule{gzip}{Reading and writing \program{gzip}-format files.} \seeurl{http://www.zlib.net}{The zlib library home page.} diff --git a/Doc/lib/sqlite3/adapter_datetime.py b/Doc/lib/sqlite3/adapter_datetime.py new file mode 100644 index 0000000..3460498 --- /dev/null +++ b/Doc/lib/sqlite3/adapter_datetime.py @@ -0,0 +1,14 @@ +import sqlite3 +import datetime, time + +def adapt_datetime(ts): + return time.mktime(ts.timetuple()) + +sqlite3.register_adapter(datetime.datetime, adapt_datetime) + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +now = datetime.datetime.now() +cur.execute("select ?", (now,)) +print cur.fetchone()[0] diff --git a/Doc/lib/sqlite3/adapter_point_1.py b/Doc/lib/sqlite3/adapter_point_1.py new file mode 100644 index 0000000..a741f6c --- /dev/null +++ b/Doc/lib/sqlite3/adapter_point_1.py @@ -0,0 +1,16 @@ +import sqlite3 + +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + + def __conform__(self, protocol): + if protocol is sqlite3.PrepareProtocol: + return "%f;%f" % (self.x, self.y) + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +p = Point(4.0, -3.2) +cur.execute("select ?", (p,)) +print cur.fetchone()[0] diff --git a/Doc/lib/sqlite3/adapter_point_2.py b/Doc/lib/sqlite3/adapter_point_2.py new file mode 100644 index 0000000..200a064 --- /dev/null +++ b/Doc/lib/sqlite3/adapter_point_2.py @@ -0,0 +1,17 @@ +import sqlite3 + +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + +def adapt_point(point): + return "%f;%f" % (point.x, point.y) + +sqlite3.register_adapter(Point, adapt_point) + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +p = Point(4.0, -3.2) +cur.execute("select ?", (p,)) +print cur.fetchone()[0] diff --git a/Doc/lib/sqlite3/collation_reverse.py b/Doc/lib/sqlite3/collation_reverse.py new file mode 100644 index 0000000..e956402 --- /dev/null +++ b/Doc/lib/sqlite3/collation_reverse.py @@ -0,0 +1,15 @@ +import sqlite3 + +def collate_reverse(string1, string2): + return -cmp(string1, string2) + +con = sqlite3.connect(":memory:") +con.create_collation("reverse", collate_reverse) + +cur = con.cursor() +cur.execute("create table test(x)") +cur.executemany("insert into test(x) values (?)", [("a",), ("b",)]) +cur.execute("select x from test order by x collate reverse") +for row in cur: + print row +con.close() diff --git a/Doc/lib/sqlite3/complete_statement.py b/Doc/lib/sqlite3/complete_statement.py new file mode 100644 index 0000000..89fc250 --- /dev/null +++ b/Doc/lib/sqlite3/complete_statement.py @@ -0,0 +1,30 @@ +# A minimal SQLite shell for experiments + +import sqlite3 + +con = sqlite3.connect(":memory:") +con.isolation_level = None +cur = con.cursor() + +buffer = "" + +print "Enter your SQL commands to execute in sqlite3." +print "Enter a blank line to exit." + +while True: + line = raw_input() + if line == "": + break + buffer += line + if sqlite3.complete_statement(buffer): + try: + buffer = buffer.strip() + cur.execute(buffer) + + if buffer.lstrip().upper().startswith("SELECT"): + print cur.fetchall() + except sqlite3.Error, e: + print "An error occured:", e.args[0] + buffer = "" + +con.close() diff --git a/Doc/lib/sqlite3/connect_db_1.py b/Doc/lib/sqlite3/connect_db_1.py new file mode 100644 index 0000000..1b97523 --- /dev/null +++ b/Doc/lib/sqlite3/connect_db_1.py @@ -0,0 +1,3 @@ +import sqlite3 + +con = sqlite3.connect("mydb") diff --git a/Doc/lib/sqlite3/connect_db_2.py b/Doc/lib/sqlite3/connect_db_2.py new file mode 100644 index 0000000..f9728b36 --- /dev/null +++ b/Doc/lib/sqlite3/connect_db_2.py @@ -0,0 +1,3 @@ +import sqlite3 + +con = sqlite3.connect(":memory:") diff --git a/Doc/lib/sqlite3/converter_point.py b/Doc/lib/sqlite3/converter_point.py new file mode 100644 index 0000000..e220e9b --- /dev/null +++ b/Doc/lib/sqlite3/converter_point.py @@ -0,0 +1,47 @@ +import sqlite3 + +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + + def __repr__(self): + return "(%f;%f)" % (self.x, self.y) + +def adapt_point(point): + return "%f;%f" % (point.x, point.y) + +def convert_point(s): + x, y = map(float, s.split(";")) + return Point(x, y) + +# Register the adapter +sqlite3.register_adapter(Point, adapt_point) + +# Register the converter +sqlite3.register_converter("point", convert_point) + +p = Point(4.0, -3.2) + +######################### +# 1) Using declared types +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES) +cur = con.cursor() +cur.execute("create table test(p point)") + +cur.execute("insert into test(p) values (?)", (p,)) +cur.execute("select p from test") +print "with declared types:", cur.fetchone()[0] +cur.close() +con.close() + +####################### +# 1) Using column names +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_COLNAMES) +cur = con.cursor() +cur.execute("create table test(p)") + +cur.execute("insert into test(p) values (?)", (p,)) +cur.execute('select p as "p [point]" from test') +print "with column names:", cur.fetchone()[0] +cur.close() +con.close() diff --git a/Doc/lib/sqlite3/countcursors.py b/Doc/lib/sqlite3/countcursors.py new file mode 100644 index 0000000..df04cad --- /dev/null +++ b/Doc/lib/sqlite3/countcursors.py @@ -0,0 +1,15 @@ +import sqlite3 + +class CountCursorsConnection(sqlite3.Connection): + def __init__(self, *args, **kwargs): + sqlite3.Connection.__init__(self, *args, **kwargs) + self.numcursors = 0 + + def cursor(self, *args, **kwargs): + self.numcursors += 1 + return sqlite3.Connection.cursor(self, *args, **kwargs) + +con = sqlite3.connect(":memory:", factory=CountCursorsConnection) +cur1 = con.cursor() +cur2 = con.cursor() +print con.numcursors diff --git a/Doc/lib/sqlite3/createdb.py b/Doc/lib/sqlite3/createdb.py new file mode 100644 index 0000000..ee2950b --- /dev/null +++ b/Doc/lib/sqlite3/createdb.py @@ -0,0 +1,28 @@ +# Not referenced from the documentation, but builds the database file the other +# code snippets expect. + +import sqlite3 +import os + +DB_FILE = "mydb" + +if os.path.exists(DB_FILE): + os.remove(DB_FILE) + +con = sqlite3.connect(DB_FILE) +cur = con.cursor() +cur.execute(""" + create table people + ( + name_last varchar(20), + age integer + ) + """) + +cur.execute("insert into people (name_last, age) values ('Yeltsin', 72)") +cur.execute("insert into people (name_last, age) values ('Putin', 51)") + +con.commit() + +cur.close() +con.close() diff --git a/Doc/lib/sqlite3/execsql_fetchonerow.py b/Doc/lib/sqlite3/execsql_fetchonerow.py new file mode 100644 index 0000000..8044ecf --- /dev/null +++ b/Doc/lib/sqlite3/execsql_fetchonerow.py @@ -0,0 +1,17 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() +SELECT = "select name_last, age from people order by age, name_last" + +# 1. Iterate over the rows available from the cursor, unpacking the +# resulting sequences to yield their elements (name_last, age): +cur.execute(SELECT) +for (name_last, age) in cur: + print '%s is %d years old.' % (name_last, age) + +# 2. Equivalently: +cur.execute(SELECT) +for row in cur: + print '%s is %d years old.' % (row[0], row[1]) diff --git a/Doc/lib/sqlite3/execsql_printall_1.py b/Doc/lib/sqlite3/execsql_printall_1.py new file mode 100644 index 0000000..d27d735 --- /dev/null +++ b/Doc/lib/sqlite3/execsql_printall_1.py @@ -0,0 +1,13 @@ +import sqlite3 + +# Create a connection to the database file "mydb": +con = sqlite3.connect("mydb") + +# Get a Cursor object that operates in the context of Connection con: +cur = con.cursor() + +# Execute the SELECT statement: +cur.execute("select * from people order by age") + +# Retrieve all rows as a sequence and print that sequence: +print cur.fetchall() diff --git a/Doc/lib/sqlite3/execute_1.py b/Doc/lib/sqlite3/execute_1.py new file mode 100644 index 0000000..fb3784f --- /dev/null +++ b/Doc/lib/sqlite3/execute_1.py @@ -0,0 +1,11 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +who = "Yeltsin" +age = 72 + +cur.execute("select name_last, age from people where name_last=? and age=?", (who, age)) +print cur.fetchone() diff --git a/Doc/lib/sqlite3/execute_2.py b/Doc/lib/sqlite3/execute_2.py new file mode 100644 index 0000000..df6c894 --- /dev/null +++ b/Doc/lib/sqlite3/execute_2.py @@ -0,0 +1,12 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +who = "Yeltsin" +age = 72 + +cur.execute("select name_last, age from people where name_last=:who and age=:age", + {"who": who, "age": age}) +print cur.fetchone() diff --git a/Doc/lib/sqlite3/execute_3.py b/Doc/lib/sqlite3/execute_3.py new file mode 100644 index 0000000..b64621f --- /dev/null +++ b/Doc/lib/sqlite3/execute_3.py @@ -0,0 +1,12 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +who = "Yeltsin" +age = 72 + +cur.execute("select name_last, age from people where name_last=:who and age=:age", + locals()) +print cur.fetchone() diff --git a/Doc/lib/sqlite3/executemany_1.py b/Doc/lib/sqlite3/executemany_1.py new file mode 100644 index 0000000..24357c5 --- /dev/null +++ b/Doc/lib/sqlite3/executemany_1.py @@ -0,0 +1,24 @@ +import sqlite3 + +class IterChars: + def __init__(self): + self.count = ord('a') + + def __iter__(self): + return self + + def next(self): + if self.count > ord('z'): + raise StopIteration + self.count += 1 + return (chr(self.count - 1),) # this is a 1-tuple + +con = sqlite3.connect(":memory:") +cur = con.cursor() +cur.execute("create table characters(c)") + +theIter = IterChars() +cur.executemany("insert into characters(c) values (?)", theIter) + +cur.execute("select c from characters") +print cur.fetchall() diff --git a/Doc/lib/sqlite3/executemany_2.py b/Doc/lib/sqlite3/executemany_2.py new file mode 100644 index 0000000..05857c0 --- /dev/null +++ b/Doc/lib/sqlite3/executemany_2.py @@ -0,0 +1,15 @@ +import sqlite3 + +def char_generator(): + import string + for c in string.letters[:26]: + yield (c,) + +con = sqlite3.connect(":memory:") +cur = con.cursor() +cur.execute("create table characters(c)") + +cur.executemany("insert into characters(c) values (?)", char_generator()) + +cur.execute("select c from characters") +print cur.fetchall() diff --git a/Doc/lib/sqlite3/executescript.py b/Doc/lib/sqlite3/executescript.py new file mode 100644 index 0000000..0795b47 --- /dev/null +++ b/Doc/lib/sqlite3/executescript.py @@ -0,0 +1,24 @@ +import sqlite3 + +con = sqlite3.connect(":memory:") +cur = con.cursor() +cur.executescript(""" + create table person( + firstname, + lastname, + age + ); + + create table book( + title, + author, + published + ); + + insert into book(title, author, published) + values ( + 'Dirk Gently''s Holistic Detective Agency + 'Douglas Adams', + 1987 + ); + """) diff --git a/Doc/lib/sqlite3/insert_more_people.py b/Doc/lib/sqlite3/insert_more_people.py new file mode 100644 index 0000000..edbc79e --- /dev/null +++ b/Doc/lib/sqlite3/insert_more_people.py @@ -0,0 +1,16 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +newPeople = ( + ('Lebed' , 53), + ('Zhirinovsky' , 57), + ) + +for person in newPeople: + cur.execute("insert into people (name_last, age) values (?, ?)", person) + +# The changes will not be saved unless the transaction is committed explicitly: +con.commit() diff --git a/Doc/lib/sqlite3/md5func.py b/Doc/lib/sqlite3/md5func.py new file mode 100644 index 0000000..5769687 --- /dev/null +++ b/Doc/lib/sqlite3/md5func.py @@ -0,0 +1,11 @@ +import sqlite3 +import md5 + +def md5sum(t): + return md5.md5(t).hexdigest() + +con = sqlite3.connect(":memory:") +con.create_function("md5", 1, md5sum) +cur = con.cursor() +cur.execute("select md5(?)", ("foo",)) +print cur.fetchone()[0] diff --git a/Doc/lib/sqlite3/mysumaggr.py b/Doc/lib/sqlite3/mysumaggr.py new file mode 100644 index 0000000..6d0cd55 --- /dev/null +++ b/Doc/lib/sqlite3/mysumaggr.py @@ -0,0 +1,20 @@ +import sqlite3 + +class MySum: + def __init__(self): + self.count = 0 + + def step(self, value): + self.count += value + + def finalize(self): + return self.count + +con = sqlite3.connect(":memory:") +con.create_aggregate("mysum", 1, MySum) +cur = con.cursor() +cur.execute("create table test(i)") +cur.execute("insert into test(i) values (1)") +cur.execute("insert into test(i) values (2)") +cur.execute("select mysum(i) from test") +print cur.fetchone()[0] diff --git a/Doc/lib/sqlite3/parse_colnames.py b/Doc/lib/sqlite3/parse_colnames.py new file mode 100644 index 0000000..fcded00 --- /dev/null +++ b/Doc/lib/sqlite3/parse_colnames.py @@ -0,0 +1,8 @@ +import sqlite3 +import datetime + +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_COLNAMES) +cur = con.cursor() +cur.execute('select ? as "x [timestamp]"', (datetime.datetime.now(),)) +dt = cur.fetchone()[0] +print dt, type(dt) diff --git a/Doc/lib/sqlite3/pysqlite_datetime.py b/Doc/lib/sqlite3/pysqlite_datetime.py new file mode 100644 index 0000000..efa4b06 --- /dev/null +++ b/Doc/lib/sqlite3/pysqlite_datetime.py @@ -0,0 +1,20 @@ +import sqlite3 +import datetime + +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) +cur = con.cursor() +cur.execute("create table test(d date, ts timestamp)") + +today = datetime.date.today() +now = datetime.datetime.now() + +cur.execute("insert into test(d, ts) values (?, ?)", (today, now)) +cur.execute("select d, ts from test") +row = cur.fetchone() +print today, "=>", row[0], type(row[0]) +print now, "=>", row[1], type(row[1]) + +cur.execute('select current_date as "d [date]", current_timestamp as "ts [timestamp]"') +row = cur.fetchone() +print "current_date", row[0], type(row[0]) +print "current_timestamp", row[1], type(row[1]) diff --git a/Doc/lib/sqlite3/row_factory.py b/Doc/lib/sqlite3/row_factory.py new file mode 100644 index 0000000..64676c8 --- /dev/null +++ b/Doc/lib/sqlite3/row_factory.py @@ -0,0 +1,13 @@ +import sqlite3 + +def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +con = sqlite3.connect(":memory:") +con.row_factory = dict_factory +cur = con.cursor() +cur.execute("select 1 as a") +print cur.fetchone()["a"] diff --git a/Doc/lib/sqlite3/rowclass.py b/Doc/lib/sqlite3/rowclass.py new file mode 100644 index 0000000..3fa0b87 --- /dev/null +++ b/Doc/lib/sqlite3/rowclass.py @@ -0,0 +1,12 @@ +import sqlite3 + +con = sqlite3.connect("mydb") +con.row_factory = sqlite3.Row + +cur = con.cursor() +cur.execute("select name_last, age from people") +for row in cur: + assert row[0] == row["name_last"] + assert row["name_last"] == row["nAmE_lAsT"] + assert row[1] == row["age"] + assert row[1] == row["AgE"] diff --git a/Doc/lib/sqlite3/shared_cache.py b/Doc/lib/sqlite3/shared_cache.py new file mode 100644 index 0000000..bf1d7b4 --- /dev/null +++ b/Doc/lib/sqlite3/shared_cache.py @@ -0,0 +1,6 @@ +import sqlite3 + +# The shared cache is only available in SQLite versions 3.3.3 or later +# See the SQLite documentaton for details. + +sqlite3.enable_shared_cache(True) diff --git a/Doc/lib/sqlite3/shortcut_methods.py b/Doc/lib/sqlite3/shortcut_methods.py new file mode 100644 index 0000000..72ed4b3 --- /dev/null +++ b/Doc/lib/sqlite3/shortcut_methods.py @@ -0,0 +1,21 @@ +import sqlite3 + +persons = [ + ("Hugo", "Boss"), + ("Calvin", "Klein") + ] + +con = sqlite3.connect(":memory:") + +# Create the table +con.execute("create table person(firstname, lastname)") + +# Fill the table +con.executemany("insert into person(firstname, lastname) values (?, ?)", persons) + +# Print the table contents +for row in con.execute("select firstname, lastname from person"): + print row + +# Using a dummy WHERE clause to not let SQLite take the shortcut table deletes. +print "I just deleted", con.execute("delete from person where 1=1").rowcount, "rows" diff --git a/Doc/lib/sqlite3/simple_tableprinter.py b/Doc/lib/sqlite3/simple_tableprinter.py new file mode 100644 index 0000000..67ea6a2 --- /dev/null +++ b/Doc/lib/sqlite3/simple_tableprinter.py @@ -0,0 +1,26 @@ +import sqlite3 + +FIELD_MAX_WIDTH = 20 +TABLE_NAME = 'people' +SELECT = 'select * from %s order by age, name_last' % TABLE_NAME + +con = sqlite3.connect("mydb") + +cur = con.cursor() +cur.execute(SELECT) + +# Print a header. +for fieldDesc in cur.description: + print fieldDesc[0].ljust(FIELD_MAX_WIDTH) , +print # Finish the header with a newline. +print '-' * 78 + +# For each row, print the value of each field left-justified within +# the maximum possible width of that field. +fieldIndices = range(len(cur.description)) +for row in cur: + for fieldIndex in fieldIndices: + fieldValue = str(row[fieldIndex]) + print fieldValue.ljust(FIELD_MAX_WIDTH) , + + print # Finish the row with a newline. diff --git a/Doc/lib/sqlite3/text_factory.py b/Doc/lib/sqlite3/text_factory.py new file mode 100644 index 0000000..3e157a8 --- /dev/null +++ b/Doc/lib/sqlite3/text_factory.py @@ -0,0 +1,42 @@ +import sqlite3 + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +# Create the table +con.execute("create table person(lastname, firstname)") + +AUSTRIA = u"\xd6sterreich" + +# by default, rows are returned as Unicode +cur.execute("select ?", (AUSTRIA,)) +row = cur.fetchone() +assert row[0] == AUSTRIA + +# but we can make pysqlite always return bytestrings ... +con.text_factory = str +cur.execute("select ?", (AUSTRIA,)) +row = cur.fetchone() +assert type(row[0]) == str +# the bytestrings will be encoded in UTF-8, unless you stored garbage in the +# database ... +assert row[0] == AUSTRIA.encode("utf-8") + +# we can also implement a custom text_factory ... +# here we implement one that will ignore Unicode characters that cannot be +# decoded from UTF-8 +con.text_factory = lambda x: unicode(x, "utf-8", "ignore") +cur.execute("select ?", ("this is latin1 and would normally create errors" + u"\xe4\xf6\xfc".encode("latin1"),)) +row = cur.fetchone() +assert type(row[0]) == unicode + +# pysqlite offers a builtin optimized text_factory that will return bytestring +# objects, if the data is in ASCII only, and otherwise return unicode objects +con.text_factory = sqlite3.OptimizedUnicode +cur.execute("select ?", (AUSTRIA,)) +row = cur.fetchone() +assert type(row[0]) == unicode + +cur.execute("select ?", ("Germany",)) +row = cur.fetchone() +assert type(row[0]) == str diff --git a/Doc/mac/scripting.tex b/Doc/mac/scripting.tex index a6d5df7..5ec4978 100644 --- a/Doc/mac/scripting.tex +++ b/Doc/mac/scripting.tex @@ -12,7 +12,7 @@ read Apple's documentation. The "Applescript Language Guide" explains the conceptual model and the terminology, and documents the standard suite. The "Open Scripting Architecture" document explains how to use OSA from an application programmers point of view. In the Apple Help -Viewer these book sare located in the Developer Documentation, Core +Viewer these books are located in the Developer Documentation, Core Technologies section. @@ -49,7 +49,7 @@ line. The generated output is a package with a number of modules, one for every suite used in the program plus an \module{__init__} module to glue it all together. The Python inheritance graph follows the AppleScript -inheritance graph, so if a programs dictionary specifies that it +inheritance graph, so if a program's dictionary specifies that it includes support for the Standard Suite, but extends one or two verbs with extra arguments then the output suite will contain a module \module{Standard_Suite} that imports and re-exports everything from diff --git a/Doc/perl/l2hinit.perl b/Doc/perl/l2hinit.perl index 89deed0..7c5d123 100644 --- a/Doc/perl/l2hinit.perl +++ b/Doc/perl/l2hinit.perl @@ -4,7 +4,17 @@ package main; use L2hos; -$HTML_VERSION = 4.0; +$HTML_VERSION = 4.01; +$LOWER_CASE_TAGS = 1; +$NO_FRENCH_QUOTES = 1; + +# '' in \code{...} is still converted, so we can't use this yet. +#$USE_CURLY_QUOTES = 1; + +# Force Unicode support to be loaded; request UTF-8 output. +do_require_extension('unicode'); +do_require_extension('utf8'); +$HTML_OPTIONS = 'utf8'; $MAX_LINK_DEPTH = 2; $ADDRESS = ''; @@ -106,6 +116,13 @@ sub custom_driver_hook { $ENV{'TEXINPUTS'} = undef; } print "\nSetting \$TEXINPUTS to $TEXINPUTS\n"; + + # Not sure why we need to deal with this both here and at the top, + # but this is needed to actually make it work. + do_require_extension('utf8'); + $charset = $utf8_str; + $CHARSET = $utf8_str; + $USE_UTF = 1; } diff --git a/Doc/perl/python.perl b/Doc/perl/python.perl index 437c5cb..ab93c7c 100644 --- a/Doc/perl/python.perl +++ b/Doc/perl/python.perl @@ -530,7 +530,6 @@ sub add_index_entry($$){ sub new_link_name_info(){ my $name = "l2h-" . ++$globals{'max_id'}; - my $aname = "<a id='$name' xml:id='$name'>"; my $ahref = gen_link($CURRENT_FILE, $name); return ($name, $ahref); } diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex index 34e8a9e..2ed8a5d 100644 --- a/Doc/ref/ref2.tex +++ b/Doc/ref/ref2.tex @@ -308,22 +308,28 @@ identifiers. They must be spelled exactly as written here:% \index{reserved word} \begin{verbatim} -and del for is raise -assert elif from lambda return -break else global not try -class except if or while -continue exec import pass yield -def finally in print +and del from not while +as elif global or with +assert else if pass yield +break except import print +class exec in raise +continue finally is return +def for lambda try \end{verbatim} % When adding keywords, use reswords.py for reformatting -Note that although the identifier \code{as} can be used as part of the -syntax of \keyword{import} statements, it is not currently a reserved -word. +\versionchanged[\constant{None} became a constant and is now +recognized by the compiler as a name for the built-in object +\constant{None}. Although it is not a keyword, you cannot assign +a different object to it]{2.4} -In some future version of Python, the identifiers \code{as} and -\code{None} will both become keywords. +\versionchanged[Both \keyword{as} and \keyword{with} are only recognized +when the \code{with_statement} future feature has been enabled. +It will always be enabled in Python 2.6. See section~\ref{with} for +details. Note that using \keyword{as} and \keyword{with} as identifiers +will always issue a warning, even when the \code{with_statement} future +directive is not in effect]{2.5} \subsection{Reserved classes of identifiers\label{id-classes}} @@ -652,7 +658,7 @@ Some examples of floating point literals: \end{verbatim} Note that numeric literals do not include a sign; a phrase like -\code{-1} is actually an expression composed of the operator +\code{-1} is actually an expression composed of the unary operator \code{-} and the literal \code{1}. diff --git a/Doc/ref/ref3.tex b/Doc/ref/ref3.tex index 964013f..d0c8ccf 100644 --- a/Doc/ref/ref3.tex +++ b/Doc/ref/ref3.tex @@ -1875,8 +1875,8 @@ These methods are called to implement the binary arithmetic operations (\code{+}, \code{-}, \code{*}, \code{//}, \code{\%}, \function{divmod()}\bifuncindex{divmod}, -\function{pow()}\bifuncindex{pow}, \code{**}, \code{<}\code{<}, -\code{>}\code{>}, \code{\&}, \code{\^}, \code{|}). For instance, to +\function{pow()}\bifuncindex{pow}, \code{**}, \code{<<}, +\code{>>}, \code{\&}, \code{\^}, \code{|}). For instance, to evaluate the expression \var{x}\code{+}\var{y}, where \var{x} is an instance of a class that has an \method{__add__()} method, \code{\var{x}.__add__(\var{y})} is called. The \method{__divmod__()} @@ -1915,8 +1915,8 @@ These methods are called to implement the binary arithmetic operations (\code{+}, \code{-}, \code{*}, \code{/}, \code{\%}, \function{divmod()}\bifuncindex{divmod}, -\function{pow()}\bifuncindex{pow}, \code{**}, \code{<}\code{<}, -\code{>}\code{>}, \code{\&}, \code{\^}, \code{|}) with reflected +\function{pow()}\bifuncindex{pow}, \code{**}, \code{<<}, +\code{>>}, \code{\&}, \code{\^}, \code{|}) with reflected (swapped) operands. These functions are only called if the left operand does not support the corresponding operation. For instance, to evaluate the expression \var{x}\code{-}\var{y}, where \var{y} is an @@ -1942,7 +1942,7 @@ complicated). \methodline[numeric object]{__ior__}{self, other} These methods are called to implement the augmented arithmetic operations (\code{+=}, \code{-=}, \code{*=}, \code{/=}, \code{\%=}, -\code{**=}, \code{<}\code{<=}, \code{>}\code{>=}, \code{\&=}, +\code{**=}, \code{<<=}, \code{>>=}, \code{\&=}, \code{\textasciicircum=}, \code{|=}). These methods should attempt to do the operation in-place (modifying \var{self}) and return the result (which could be, but does not have to be, \var{self}). If a specific method @@ -1983,9 +1983,9 @@ Called to implement the built-in functions \end{methoddesc} \begin{methoddesc}[numeric object]{__index__}{self} -Called to implement operator.index(). Also called whenever Python -needs an integer object (such as in slicing). Must return an integer -(int or long). +Called to implement \function{operator.index()}. Also called whenever +Python needs an integer object (such as in slicing). Must return an +integer (int or long). \versionadded{2.5} \end{methoddesc} @@ -2112,49 +2112,41 @@ implement a \method{__coerce__()} method, for use by the built-in \end{itemize} -\subsection{Context Managers and Contexts\label{context-managers}} +\subsection{With Statement Context Managers\label{context-managers}} \versionadded{2.5} -A \dfn{context manager} is an object that manages the entry to, and exit -from, a \dfn{context} surrounding a block of code. Context managers are -normally invoked using the \keyword{with} statement (described in -section~\ref{with}), but can also be used by directly invoking their -methods. +A \dfn{context manager} is an object that defines the runtime +context to be established when executing a \keyword{with} +statement. The context manager handles the entry into, +and the exit from, the desired runtime context for the execution +of the block of code. Context managers are normally invoked using +the \keyword{with} statement (described in section~\ref{with}), but +can also be used by directly invoking their methods. + \stindex{with} \index{context manager} -\index{context} - -Typical uses of context managers include saving and restoring various -kinds of global state, locking and unlocking resources, closing opened -files, etc. - -\begin{methoddesc}[context manager]{__context__}{self} -Invoked when the object is used as the context expression of a -\keyword{with} statement. The return value must implement -\method{__enter__()} and \method{__exit__()} methods. Simple context -managers that wish to directly -implement \method{__enter__()} and \method{__exit__()} should just -return \var{self}. - -Context managers written in Python can also implement this method using -a generator function decorated with the -\function{contextlib.contextmanager} decorator, as this can be simpler -than writing individual \method{__enter__()} and \method{__exit__()} -methods when the state to be managed is complex. -\end{methoddesc} -\begin{methoddesc}[context]{__enter__}{self} -Enter the context defined by this object. The \keyword{with} statement -will bind this method's return value to the target(s) specified in the -\keyword{as} clause of the statement, if any. +Typical uses of context managers include saving and +restoring various kinds of global state, locking and unlocking +resources, closing opened files, etc. + +For more information on context managers, see +``\ulink{Context Types}{../lib/typecontextmanager.html}'' in the +\citetitle[../lib/lib.html]{Python Library Reference}. + +\begin{methoddesc}[context manager]{__enter__}{self} +Enter the runtime context related to this object. The \keyword{with} +statement will bind this method's return value to the target(s) +specified in the \keyword{as} clause of the statement, if any. \end{methoddesc} -\begin{methoddesc}[context]{__exit__}{exc_type, exc_value, traceback} -Exit the context defined by this object. The parameters describe the -exception that caused the context to be exited. If the context was -exited without an exception, all three arguments will be -\constant{None}. +\begin{methoddesc}[context manager]{__exit__} +{self, exc_type, exc_value, traceback} +Exit the runtime context related to this object. The parameters +describe the exception that caused the context to be exited. If +the context was exited without an exception, all three arguments +will be \constant{None}. If an exception is supplied, and the method wishes to suppress the exception (i.e., prevent it from being propagated), it should return a diff --git a/Doc/ref/ref4.tex b/Doc/ref/ref4.tex index 6a3a4ef..dcdc823 100644 --- a/Doc/ref/ref4.tex +++ b/Doc/ref/ref4.tex @@ -127,7 +127,7 @@ to delete the name. An error will be reported at compile time. If the wild card form of import --- \samp{import *} --- is used in a function and the function contains or is a nested block with free -variables, the compiler will raise a SyntaxError. +variables, the compiler will raise a \exception{SyntaxError}. If \keyword{exec} is used in a function and the function contains or is a nested block with free variables, the compiler will raise a diff --git a/Doc/ref/ref5.tex b/Doc/ref/ref5.tex index 1f2dc5e..89f9977 100644 --- a/Doc/ref/ref5.tex +++ b/Doc/ref/ref5.tex @@ -22,9 +22,9 @@ are the same as for \code{othername}. When a description of an arithmetic operator below uses the phrase ``the numeric arguments are converted to a common type,'' the -arguments are coerced using the coercion rules listed at the end of -chapter \ref{datamodel}. If both arguments are standard numeric -types, the following coercions are applied: +arguments are coerced using the coercion rules listed at +~\ref{coercion-rules}. If both arguments are standard numeric types, +the following coercions are applied: \begin{itemize} \item If either argument is a complex number, the other is converted @@ -391,7 +391,8 @@ type but a string of exactly one character. A slicing selects a range of items in a sequence object (e.g., a string, tuple or list). Slicings may be used as expressions or as -targets in assignment or del statements. The syntax for a slicing: +targets in assignment or \keyword{del} statements. The syntax for a +slicing: \obindex{sequence} \obindex{string} \obindex{tuple} @@ -1158,7 +1159,7 @@ have the same precedence and chain from left to right --- see section \hline \lineii{\code{\&}} {Bitwise AND} \hline - \lineii{\code{<}\code{<}, \code{>}\code{>}} {Shifts} + \lineii{\code{<<}, \code{>>}} {Shifts} \hline \lineii{\code{+}, \code{-}}{Addition and subtraction} \hline diff --git a/Doc/ref/ref6.tex b/Doc/ref/ref6.tex index 1eb1258..04db013 100644 --- a/Doc/ref/ref6.tex +++ b/Doc/ref/ref6.tex @@ -377,7 +377,7 @@ right type (but even this is determined by the sliced object). \begin{productionlist} \production{print_stmt} {"print" ( \optional{\token{expression} ("," \token{expression})* \optional{","}}} - \productioncont{| ">\code{>}" \token{expression} + \productioncont{| ">>" \token{expression} \optional{("," \token{expression})+ \optional{","}} )} \end{productionlist} @@ -417,7 +417,7 @@ exception is raised. \keyword{print} also has an extended\index{extended print statement} form, defined by the second portion of the syntax described above. This form is sometimes referred to as ``\keyword{print} chevron.'' -In this form, the first expression after the \code{>}\code{>} must +In this form, the first expression after the \code{>>} must evaluate to a ``file-like'' object, specifically an object that has a \method{write()} method as described above. With this extended form, the subsequent expressions are printed to this file object. If the @@ -809,13 +809,14 @@ import __future__ [as name] That is not a future statement; it's an ordinary import statement with no special semantics or syntax restrictions. -Code compiled by an exec statement or calls to the builtin functions +Code compiled by an \keyword{exec} statement or calls to the builtin functions \function{compile()} and \function{execfile()} that occur in a module \module{M} containing a future statement will, by default, use the new syntax or semantics associated with the future statement. This can, starting with Python 2.2 be controlled by optional arguments to -\function{compile()} --- see the documentation of that function in the -library reference for details. +\function{compile()} --- see the documentation of that function in the +\citetitle[../lib/built-in-funcs.html]{Python Library Reference} for +details. A future statement typed at an interactive interpreter prompt will take effect for the rest of the interpreter session. If an diff --git a/Doc/ref/ref7.tex b/Doc/ref/ref7.tex index a2d46a8..6bc0b08 100644 --- a/Doc/ref/ref7.tex +++ b/Doc/ref/ref7.tex @@ -281,11 +281,8 @@ and is not handled, the exception is temporarily saved. The it is re-raised at the end of the \keyword{finally} clause. If the \keyword{finally} clause raises another exception or executes a \keyword{return} or \keyword{break} statement, the saved -exception is lost. A \keyword{continue} statement is illegal in the -\keyword{finally} clause. (The reason is a problem with the current -implementation -- this restriction may be lifted in the future). The -exception information is not available to the program during execution of -the \keyword{finally} clause. +exception is lost. The exception information is not available to the +program during execution of the \keyword{finally} clause. \kwindex{finally} When a \keyword{return}, \keyword{break} or \keyword{continue} statement is @@ -312,38 +309,34 @@ The \keyword{with} statement is used to wrap the execution of a block with methods defined by a context manager (see section~\ref{context-managers}). This allows common \keyword{try}...\keyword{except}...\keyword{finally} usage patterns to -be encapsulated as context managers for convenient reuse. +be encapsulated for convenient reuse. \begin{productionlist} \production{with_stmt} - {"with" \token{expression} ["as" target_list] ":" \token{suite}} + {"with" \token{expression} ["as" target] ":" \token{suite}} \end{productionlist} The execution of the \keyword{with} statement proceeds as follows: \begin{enumerate} -\item The expression is evaluated, to obtain a context manager -object. +\item The context expression is evaluated to obtain a context manager. -\item The context manager's \method{__context__()} method is invoked to -obtain a context object. +\item The context manager's \method{__enter__()} method is invoked. -\item The context object's \method{__enter__()} method is invoked. - -\item If a target list was included in the \keyword{with} +\item If a target was included in the \keyword{with} statement, the return value from \method{__enter__()} is assigned to it. \note{The \keyword{with} statement guarantees that if the \method{__enter__()} method returns without an error, then \method{__exit__()} will always be called. Thus, if an error occurs during the assignment to the target list, it will be treated the same as -an error occurring within the suite would be. See step 6 below.} +an error occurring within the suite would be. See step 5 below.} \item The suite is executed. -\item The context object's \method{__exit__()} method is invoked. If an -exception caused the suite to be exited, its type, value, and +\item The context manager's \method{__exit__()} method is invoked. If +an exception caused the suite to be exited, its type, value, and traceback are passed as arguments to \method{__exit__()}. Otherwise, three \constant{None} arguments are supplied. diff --git a/Doc/texinputs/python.sty b/Doc/texinputs/python.sty index 4cb02de..3ce62f4 100644 --- a/Doc/texinputs/python.sty +++ b/Doc/texinputs/python.sty @@ -848,8 +848,17 @@ % but only if we actually used hyperref: \ifpdf \newcommand{\url}[1]{{% - \py@pdfstartlink attr{/Border [0 0 0]} user{/S /URI /URI (#1)}% - \py@LinkColor% color of the link text + \py@pdfstartlink% + attr{ /Border [0 0 0] }% + user{% + /Subtype/Link% + /A<<% + /Type/Action% + /S/URI% + /URI(#1)% + >>% + }% + \py@LinkColor% color of the link text \py@smallsize\sf #1% \py@NormalColor% Turn it back off; these are declarative \pdfendlink}% and don't appear bound to the current @@ -925,7 +934,16 @@ \ifpdf \newcommand{\ulink}[2]{{% % For PDF, we *should* only generate a link when the URL is absolute. - \py@pdfstartlink attr{/Border [0 0 0]} user{/S /URI /URI (#2)}% + \py@pdfstartlink% + attr{ /Border [0 0 0] }% + user{% + /Subtype/Link% + /A<<% + /Type/Action% + /S/URI% + /URI(#2)% + >>% + }% \py@LinkColor% color of the link text #1% \py@NormalColor% Turn it back off; these are declarative diff --git a/Doc/tut/glossary.tex b/Doc/tut/glossary.tex index c8082d5..17cc767 100644 --- a/Doc/tut/glossary.tex +++ b/Doc/tut/glossary.tex @@ -7,7 +7,7 @@ \index{>>>} -\item[\code{>\code{>}>}] +\item[\code{>>>}] The typical Python prompt of the interactive shell. Often seen for code examples that can be tried right away in the interpreter. diff --git a/Doc/tut/tut.tex b/Doc/tut/tut.tex index 7f9a7ee..4e0a26b 100644 --- a/Doc/tut/tut.tex +++ b/Doc/tut/tut.tex @@ -264,7 +264,7 @@ the command or module to handle. When commands are read from a tty, the interpreter is said to be in \emph{interactive mode}. In this mode it prompts for the next command with the \emph{primary prompt}, usually three greater-than signs -(\samp{>\code{>}>~}); for continuation lines it prompts with the +(\samp{>>>~}); for continuation lines it prompts with the \emph{secondary prompt}, by default three dots (\samp{...~}). The interpreter prints a welcome message stating its version number and a copyright notice before printing the first prompt: @@ -423,7 +423,7 @@ if filename and os.path.isfile(filename): \chapter{An Informal Introduction to Python \label{informal}} In the following examples, input and output are distinguished by the -presence or absence of prompts (\samp{>\code{>}>~} and \samp{...~}): to repeat +presence or absence of prompts (\samp{>>>~} and \samp{...~}): to repeat the example, you must type everything after the prompt, when the prompt appears; lines that do not begin with a prompt are output from the interpreter. % @@ -455,7 +455,7 @@ STRING = "# This is not a comment." \section{Using Python as a Calculator \label{calculator}} Let's try some simple Python commands. Start the interpreter and wait -for the primary prompt, \samp{>\code{>}>~}. (It shouldn't take long.) +for the primary prompt, \samp{>>>~}. (It shouldn't take long.) \subsection{Numbers \label{numbers}} @@ -2723,7 +2723,7 @@ standard module \module{__builtin__}\refbimodindex{__builtin__}: 'FloatingPointError', 'FutureWarning', 'IOError', 'ImportError', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None', 'NotImplemented', - 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', + 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', @@ -3763,6 +3763,38 @@ for releasing external resources (such as files or network connections), regardless of whether the use of the resource was successful. +\section{Predefined Clean-up Actions \label{cleanup-with}} + +Some objects define standard clean-up actions to be undertaken when +the object is no longer needed, regardless of whether or not the +operation using the object succeeded or failed. +Look at the following example, which tries to open a file and print +its contents to the screen. + +\begin{verbatim} +for line in open("myfile.txt"): + print line +\end{verbatim} + +The problem with this code is that it leaves the file open for an +indeterminate amount of time after the code has finished executing. +This is not an issue in simple scripts, but can be a problem for +larger applications. The \keyword{with} statement allows +objects like files to be used in a way that ensures they are +always cleaned up promptly and correctly. + +\begin{verbatim} +with open("myfile.txt") as f: + for line in f: + print line +\end{verbatim} + +After the statement is executed, the file \var{f} is always closed, +even if a problem was encountered while processing the lines. Other +objects which provide predefined clean-up actions will indicate +this in their documentation. + + \chapter{Classes \label{classes}} Python's class mechanism adds classes to the language with a minimum @@ -4757,7 +4789,7 @@ for sending mail: \begin{verbatim} >>> import urllib2 >>> for line in urllib2.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl'): -... if 'EST' in line: # look for Eastern Standard Time +... if 'EST' in line or 'EDT' in line: # look for Eastern Time ... print line <BR>Nov. 25, 09:43:32 PM EST diff --git a/Doc/whatsnew/whatsnew20.tex b/Doc/whatsnew/whatsnew20.tex index bf458fa..56d15b8 100644 --- a/Doc/whatsnew/whatsnew20.tex +++ b/Doc/whatsnew/whatsnew20.tex @@ -330,7 +330,7 @@ List comprehensions have the form: [ expression for expr in sequence1 for expr2 in sequence2 ... for exprN in sequenceN - if condition + if condition ] \end{verbatim} The \keyword{for}...\keyword{in} clauses contain the sequences to be @@ -356,7 +356,7 @@ for expr1 in sequence1: # resulting list. \end{verbatim} -This means that when there are \keyword{for}...\keyword{in} clauses, +This means that when there are multiple \keyword{for}...\keyword{in} clauses, the resulting list will be equal to the product of the lengths of all the sequences. If you have two lists of length 3, the output list is 9 elements long: @@ -400,7 +400,7 @@ statement \code{a += 2} increments the value of the variable % The empty groups below prevent conversion to guillemets. The full list of supported assignment operators is \code{+=}, \code{-=}, \code{*=}, \code{/=}, \code{\%=}, \code{**=}, \code{\&=}, -\code{|=}, \verb|^=|, \code{>{}>=}, and \code{<{}<=}. Python classes can +\code{|=}, \verb|^=|, \code{>>=}, and \code{<<=}. Python classes can override the augmented assignment operators by defining methods named \method{__iadd__}, \method{__isub__}, etc. For example, the following \class{Number} class stores a number and supports using += to create a diff --git a/Doc/whatsnew/whatsnew23.tex b/Doc/whatsnew/whatsnew23.tex index e29ecdd..a122083 100644 --- a/Doc/whatsnew/whatsnew23.tex +++ b/Doc/whatsnew/whatsnew23.tex @@ -318,7 +318,7 @@ Hisao and Martin von~L\"owis.} %====================================================================== -\section{PEP 273: Importing Modules from Zip Archives} +\section{PEP 273: Importing Modules from ZIP Archives} The new \module{zipimport} module adds support for importing modules from a ZIP-format archive. You don't need to import the diff --git a/Doc/whatsnew/whatsnew25.tex b/Doc/whatsnew/whatsnew25.tex index 750162f..3006624 100644 --- a/Doc/whatsnew/whatsnew25.tex +++ b/Doc/whatsnew/whatsnew25.tex @@ -2,13 +2,11 @@ \usepackage{distutils} % $Id$ -% The easy_install stuff -% Describe the pkgutil module % Fix XXX comments % Count up the patches and bugs \title{What's New in Python 2.5} -\release{0.1} +\release{0.2} \author{A.M. Kuchling} \authoraddress{\email{amk@amk.ca}} @@ -34,32 +32,6 @@ rationale, refer to the PEP for a particular new feature. %====================================================================== -\section{PEP 243: Uploading Modules to PyPI\label{pep-243}} - -PEP 243 describes an HTTP-based protocol for submitting software -packages to a central archive. The Python package index at -\url{http://cheeseshop.python.org} now supports package uploads, and -the new \command{upload} Distutils command will upload a package to the -repository. - -Before a package can be uploaded, you must be able to build a -distribution using the \command{sdist} Distutils command. Once that -works, you can run \code{python setup.py upload} to add your package -to the PyPI archive. Optionally you can GPG-sign the package by -supplying the \longprogramopt{sign} and -\longprogramopt{identity} options. - -\begin{seealso} - -\seepep{243}{Module Repository Upload Mechanism}{PEP written by -Sean Reifschneider; implemented by Martin von~L\"owis -and Richard Jones. Note that the PEP doesn't exactly -describe what's implemented in PyPI.} - -\end{seealso} - - -%====================================================================== \section{PEP 308: Conditional Expressions\label{pep-308}} For a long time, people have been requesting a way to write @@ -236,6 +208,20 @@ setup(name='PyPackage', % VERSION), ) \end{verbatim} + +Another new enhancement to the Python package index at +\url{http://cheeseshop.python.org} is storing source and binary +archives for a package. The new \command{upload} Distutils command +will upload a package to the repository. + +Before a package can be uploaded, you must be able to build a +distribution using the \command{sdist} Distutils command. Once that +works, you can run \code{python setup.py upload} to add your package +to the PyPI archive. Optionally you can GPG-sign the package by +supplying the \longprogramopt{sign} and +\longprogramopt{identity} options. + +Package uploading was implemented by Martin von~L\"owis and Richard Jones. \begin{seealso} @@ -394,13 +380,17 @@ finally: \end{verbatim} The code in \var{block-1} is executed. If the code raises an -exception, the handlers are tried in order: \var{handler-1}, -\var{handler-2}, ... If no exception is raised, the \var{else-block} -is executed. No matter what happened previously, the -\var{final-block} is executed once the code block is complete and any -raised exceptions handled. Even if there's an error in an exception -handler or the \var{else-block} and a new exception is raised, the -\var{final-block} is still executed. +exception, the various \keyword{except} blocks are tested: if the +exception is of class \class{Exception1}, \var{handler-1} is executed; +otherwise if it's of class \class{Exception2}, \var{handler-2} is +executed, and so forth. If no exception is raised, the +\var{else-block} is executed. + +No matter what happened previously, the \var{final-block} is executed +once the code block is complete and any raised exceptions handled. +Even if there's an error in an exception handler or the +\var{else-block} and a new exception is raised, the +code in the \var{final-block} is still run. \begin{seealso} @@ -415,7 +405,7 @@ implementation by Thomas Lee.} Python 2.5 adds a simple way to pass values \emph{into} a generator. As introduced in Python 2.3, generators only produce output; once a -generator's code is invoked to create an iterator, there's no way to +generator's code was invoked to create an iterator, there was no way to pass any new information into the function when its execution is resumed. Sometimes the ability to pass in some information would be useful. Hackish solutions to this include making the generator's code @@ -522,9 +512,9 @@ generators: \exception{GeneratorExit} or \exception{StopIteration}; catching the exception and doing anything else is illegal and will trigger a \exception{RuntimeError}. \method{close()} will also be called by - Python's garbage collection when the generator is garbage-collected. + Python's garbage collector when the generator is garbage-collected. - If you need to run cleanup code in case of a \exception{GeneratorExit}, + If you need to run cleanup code when a \exception{GeneratorExit} occurs, I suggest using a \code{try: ... finally:} suite instead of catching \exception{GeneratorExit}. @@ -535,8 +525,8 @@ one-way producers of information into both producers and consumers. Generators also become \emph{coroutines}, a more generalized form of subroutines. Subroutines are entered at one point and exited at -another point (the top of the function, and a \keyword{return -statement}), but coroutines can be entered, exited, and resumed at +another point (the top of the function, and a \keyword{return} +statement), but coroutines can be entered, exited, and resumed at many different points (the \keyword{yield} statements). We'll have to figure out patterns for using coroutines effectively in Python. @@ -579,14 +569,12 @@ Sugalski.} %====================================================================== \section{PEP 343: The 'with' statement\label{pep-343}} -The '\keyword{with}' statement allows a clearer version of code that -uses \code{try...finally} blocks to ensure that clean-up code is -executed. - -In this section, I'll discuss the statement as it will commonly be -used. In the next section, I'll examine the implementation details -and show how to write objects called ``context managers'' and -``contexts'' for use with this statement. +The '\keyword{with}' statement clarifies code that previously would +use \code{try...finally} blocks to ensure that clean-up code is +executed. In this section, I'll discuss the statement as it will +commonly be used. In the next section, I'll examine the +implementation details and show how to write objects for use with this +statement. The '\keyword{with}' statement is a new control-flow structure whose basic structure is: @@ -596,13 +584,13 @@ with expression [as variable]: with-block \end{verbatim} -The expression is evaluated, and it should result in a type of object -that's called a context manager. The context manager can return a +The expression is evaluated, and it should result in an object that +supports the context management protocol. This object may return a value that can optionally be bound to the name \var{variable}. (Note -carefully: \var{variable} is \emph{not} assigned the result of -\var{expression}.) One method of the context manager is run before -\var{with-block} is executed, and another method is run after the -block is done, even if the block raised an exception. +carefully that \var{variable} is \emph{not} assigned the result of +\var{expression}.) The object can then run set-up code +before \var{with-block} is executed and some clean-up code +is executed after the block is done, even if the block raised an exception. To enable the statement in Python 2.5, you need to add the following directive to your module: @@ -613,7 +601,8 @@ from __future__ import with_statement The statement will always be enabled in Python 2.6. -Some standard Python objects can now behave as context managers. File +Some standard Python objects now support the context management +protocol and can be used with the '\keyword{with}' statement. File objects are one example: \begin{verbatim} @@ -637,12 +626,12 @@ with lock: ... \end{verbatim} -The lock is acquired before the block is executed, and always released once +The lock is acquired before the block is executed and always released once the block is complete. The \module{decimal} module's contexts, which encapsulate the desired -precision and rounding characteristics for computations, can also be -used as context managers. +precision and rounding characteristics for computations, provide a +\method{context_manager()} method for getting a context manager: \begin{verbatim} import decimal @@ -651,7 +640,8 @@ import decimal v1 = decimal.Decimal('578') print v1.sqrt() -with decimal.Context(prec=16): +ctx = decimal.Context(prec=16) +with ctx.context_manager(): # All code in this block uses a precision of 16 digits. # The original context is restored on exiting the block. print v1.sqrt() @@ -660,47 +650,45 @@ with decimal.Context(prec=16): \subsection{Writing Context Managers\label{context-managers}} Under the hood, the '\keyword{with}' statement is fairly complicated. -Most people will only use '\keyword{with}' in company with -existing objects that are documented to work as context managers, and -don't need to know these details, so you can skip the following section if -you like. Authors of new context managers will need to understand the -details of the underlying implementation. +Most people will only use '\keyword{with}' in company with existing +objects and don't need to know these details, so you can skip the rest +of this section if you like. Authors of new objects will need to +understand the details of the underlying implementation and should +keep reading. A high-level explanation of the context management protocol is: \begin{itemize} -\item The expression is evaluated and should result in an object -that's a context manager, meaning that it has a -\method{__context__()} method. -\item This object's \method{__context__()} method is called, and must -return a context object. +\item The expression is evaluated and should result in an object +called a ``context manager''. The context manager must have +\method{__enter__()} and \method{__exit__()} methods. -\item The context's \method{__enter__()} method is called. -The value returned is assigned to \var{VAR}. If no \code{'as \var{VAR}'} -clause is present, the value is simply discarded. +\item The context manager's \method{__enter__()} method is called. The value +returned is assigned to \var{VAR}. If no \code{'as \var{VAR}'} clause +is present, the value is simply discarded. \item The code in \var{BLOCK} is executed. -\item If \var{BLOCK} raises an exception, the context object's +\item If \var{BLOCK} raises an exception, the \method{__exit__(\var{type}, \var{value}, \var{traceback})} is called -with the exception's information, the same values returned by -\function{sys.exc_info()}. The method's return value -controls whether the exception is re-raised: any false value -re-raises the exception, and \code{True} will result in suppressing it. -You'll only rarely want to suppress the exception; the -author of the code containing the '\keyword{with}' statement will -never realize anything went wrong. +with the exception details, the same values returned by +\function{sys.exc_info()}. The method's return value controls whether +the exception is re-raised: any false value re-raises the exception, +and \code{True} will result in suppressing it. You'll only rarely +want to suppress the exception, because if you do +the author of the code containing the +'\keyword{with}' statement will never realize anything went wrong. \item If \var{BLOCK} didn't raise an exception, -the context object's \method{__exit__()} is still called, +the \method{__exit__()} method is still called, but \var{type}, \var{value}, and \var{traceback} are all \code{None}. \end{itemize} Let's think through an example. I won't present detailed code but -will only sketch the necessary code. The example will be writing a -context manager for a database that supports transactions. +will only sketch the methods necessary for a database that supports +transactions. (For people unfamiliar with database terminology: a set of changes to the database are grouped into a transaction. Transactions can be @@ -721,22 +709,13 @@ with db_connection as cursor: # ... more operations ... \end{verbatim} -The transaction should either be committed if the code in the block -runs flawlessly, or rolled back if there's an exception. - -First, the \class{DatabaseConnection} needs a \method{__context__()} -method. Sometimes an object can be its own context manager and can -simply return \code{self}; the \module{threading} module's lock objects -can do this. For our database example, though, we need to -create a new object; I'll call this class \class{DatabaseContext}. -Our \method{__context__()} must therefore look like this: +The transaction should be committed if the code in the block +runs flawlessly or rolled back if there's an exception. +Here's the basic interface +for \class{DatabaseConnection} that I'll assume: \begin{verbatim} class DatabaseConnection: - ... - def __context__ (self): - return DatabaseContext(self) - # Database interface def cursor (self): "Returns a cursor object and starts a new transaction" @@ -746,29 +725,18 @@ class DatabaseConnection: "Rolls back current transaction" \end{verbatim} -The context needs the connection object so that the connection -object's \method{commit()} or \method{rollback()} methods can be -called: +The \method {__enter__()} method is pretty easy, having only to start +a new transaction. For this application the resulting cursor object +would be a useful result, so the method will return it. The user can +then add \code{as cursor} to their '\keyword{with}' statement to bind +the cursor to a variable name. \begin{verbatim} -class DatabaseContext: - def __init__ (self, connection): - self.connection = connection -\end{verbatim} - -The \method {__enter__()} method is pretty easy, having only -to start a new transaction. In this example, -the resulting cursor object would be a useful result, -so the method will return it. The user can -then add \code{as cursor} to their '\keyword{with}' statement -to bind the cursor to a variable name. - -\begin{verbatim} -class DatabaseContext: +class DatabaseConnection: ... def __enter__ (self): # Code to start a new transaction - cursor = self.connection.cursor() + cursor = self.cursor() return cursor \end{verbatim} @@ -776,21 +744,23 @@ The \method{__exit__()} method is the most complicated because it's where most of the work has to be done. The method has to check if an exception occurred. If there was no exception, the transaction is committed. The transaction is rolled back if there was an exception. -Here the code will just fall off the end of the function, returning -the default value of \code{None}. \code{None} is false, so the exception -will be re-raised automatically. If you wished, you could be more explicit -and add a \keyword{return} at the marked location. + +In the code below, execution will just fall off the end of the +function, returning the default value of \code{None}. \code{None} is +false, so the exception will be re-raised automatically. If you +wished, you could be more explicit and add a \keyword{return} +statement at the marked location. \begin{verbatim} -class DatabaseContext: +class DatabaseConnection: ... def __exit__ (self, type, value, tb): if tb is None: # No exception, so commit - self.connection.commit() + self.commit() else: # Exception occurred, so rollback. - self.connection.rollback() + self.rollback() # return False \end{verbatim} @@ -798,25 +768,26 @@ class DatabaseContext: \subsection{The contextlib module\label{module-contextlib}} The new \module{contextlib} module provides some functions and a -decorator that are useful for writing context managers. - -The decorator is called \function{contextmanager}, and lets you write -a simple context manager as a generator. The generator should yield -exactly one value. The code up to the \keyword{yield} will be -executed as the \method{__enter__()} method, and the value yielded -will be the method's return value that will get bound to the variable -in the '\keyword{with}' statement's \keyword{as} clause, if any. The -code after the \keyword{yield} will be executed in the -\method{__exit__()} method. Any exception raised in the block -will be raised by the \keyword{yield} statement. +decorator that are useful for writing objects for use with the +'\keyword{with}' statement. + +The decorator is called \function{contextfactory}, and lets you write +a single generator function instead of defining a new class. The generator +should yield exactly one value. The code up to the \keyword{yield} +will be executed as the \method{__enter__()} method, and the value +yielded will be the method's return value that will get bound to the +variable in the '\keyword{with}' statement's \keyword{as} clause, if +any. The code after the \keyword{yield} will be executed in the +\method{__exit__()} method. Any exception raised in the block will be +raised by the \keyword{yield} statement. Our database example from the previous section could be written using this decorator as: \begin{verbatim} -from contextlib import contextmanager +from contextlib import contextfactory -@contextmanager +@contextfactory def db_transaction (connection): cursor = connection.cursor() try: @@ -832,29 +803,11 @@ with db_transaction(db) as cursor: ... \end{verbatim} -You can also use this decorator to write the \method{__context__()} method -for a class without creating a new class for the context: - -\begin{verbatim} -class DatabaseConnection: - - @contextmanager - def __context__ (self): - cursor = self.cursor() - try: - yield cursor - except: - self.rollback() - raise - else: - self.commit() -\end{verbatim} - - -There's a \function{nested(\var{mgr1}, \var{mgr2}, ...)} manager that -combines a number of context managers so you don't need to write -nested '\keyword{with}' statements. This example statement does two -things, starting a database transaction and acquiring a thread lock: +The \module{contextlib} module also has a \function{nested(\var{mgr1}, +\var{mgr2}, ...)} function that combines a number of context managers so you +don't need to write nested '\keyword{with}' statements. In this +example, the single '\keyword{with}' statement both starts a database +transaction and acquires a thread lock: \begin{verbatim} lock = threading.Lock() @@ -862,7 +815,7 @@ with nested (db_transaction(db), lock) as (cursor, locked): ... \end{verbatim} -Finally, the \function{closing(\var{object})} context manager +Finally, the \function{closing(\var{object})} function returns \var{object} so that it can be bound to a variable, and calls \code{\var{object}.close()} at the end of the block. @@ -880,8 +833,7 @@ with closing(urllib.urlopen('http://www.yahoo.com')) as f: \seepep{343}{The ``with'' statement}{PEP written by Guido van~Rossum and Nick Coghlan; implemented by Mike Bland, Guido van~Rossum, and Neal Norwitz. The PEP shows the code generated for a '\keyword{with}' -statement, which can be helpful in learning how context managers -work.} +statement, which can be helpful in learning how the statement works.} \seeurl{../lib/module-contextlib.html}{The documentation for the \module{contextlib} module.} @@ -1064,7 +1016,7 @@ and implemented by Travis Oliphant.} %====================================================================== -\section{Other Language Changes} +\section{Other Language Changes\label{other-lang}} Here are all of the changes that Python 2.5 makes to the core Python language. @@ -1090,6 +1042,36 @@ print d[1], d[2] # Prints 1, 2 print d[3], d[4] # Prints 0, 0 \end{verbatim} +\item Both 8-bit and Unicode strings have new \method{partition(sep)} +and \method{rpartition(sep)} methods that simplify a common use case. +The \method{find(S)} method is often used to get an index which is +then used to slice the string and obtain the pieces that are before +and after the separator. + +\method{partition(sep)} condenses this +pattern into a single method call that returns a 3-tuple containing +the substring before the separator, the separator itself, and the +substring after the separator. If the separator isn't found, the +first element of the tuple is the entire string and the other two +elements are empty. \method{rpartition(sep)} also returns a 3-tuple +but starts searching from the end of the string; the \samp{r} stands +for 'reverse'. + +Some examples: + +\begin{verbatim} +>>> ('http://www.python.org').partition('://') +('http', '://', 'www.python.org') +>>> (u'Subject: a quick question').partition(':') +(u'Subject', u':', u' a quick question') +>>> ('file:/usr/share/doc/index.html').partition('://') +('file:/usr/share/doc/index.html', '', '') +>>> 'www.python.org'.rpartition('.') +('www.python', '.', 'org') +\end{verbatim} + +(Implemented by Fredrik Lundh following a suggestion by Raymond Hettinger.) + \item The \function{min()} and \function{max()} built-in functions gained a \code{key} keyword parameter analogous to the \code{key} argument for \method{sort()}. This parameter supplies a function that @@ -1127,6 +1109,14 @@ a line like this near the top of the source file: # -*- coding: latin1 -*- \end{verbatim} +\item One error that Python programmers sometimes make is forgetting +to include an \file{__init__.py} module in a package directory. +Debugging this mistake can be confusing, and usually requires running +Python with the \programopt{-v} switch to log all the paths searched. +In Python 2.5, a new \exception{ImportWarning} warning is raised when +an import would have picked up a directory as a package but no +\file{__init__.py} was found. (Implemented by Thomas Wouters.) + \item The list of base classes in a class definition can now be empty. As an example, this is now legal: @@ -1140,7 +1130,7 @@ class C(): %====================================================================== -\subsection{Interactive Interpreter Changes} +\subsection{Interactive Interpreter Changes\label{interactive}} In the interactive interpreter, \code{quit} and \code{exit} have long been strings so that new users get a somewhat helpful message @@ -1158,7 +1148,14 @@ interpreter as they expect. (Implemented by Georg Brandl.) %====================================================================== -\subsection{Optimizations} +\subsection{Optimizations\label{opts}} + +Several of the optimizations were developed at the NeedForSpeed +sprint, an event held in Reykjavik, Iceland, from May 21--28 2006. +The sprint focused on speed enhancements to the CPython implementation +and was funded by EWT LLC with local support from CCP Games. Those +optimizations added at this sprint are specially marked in the +following list. \begin{itemize} @@ -1169,15 +1166,53 @@ In 2.5 the internal data structure has been customized for implementing sets, and as a result sets will use a third less memory and are somewhat faster. (Implemented by Raymond Hettinger.) -\item The performance of some Unicode operations, such as -character map decoding, has been improved. +\item The speed of some Unicode operations, such as +finding substrings, string splitting, and character map decoding, has +been improved. (Substring search and splitting improvements were +added by Fredrik Lundh and Andrew Dalke at the NeedForSpeed +sprint. Character map decoding was improved by Walter D\"orwald.) % Patch 1313939 +\item The \function{long(\var{str}, \var{base})} function is now +faster on long digit strings because fewer intermediate results are +calculated. The peak is for strings of around 800--1000 digits where +the function is 6 times faster. +(Contributed by Alan McIntyre and committed at the NeedForSpeed sprint.) +% Patch 1442927 + +\item The \module{struct} module now compiles structure format +strings into an internal representation and caches this +representation, yielding a 20\% speedup. (Contributed by Bob Ippolito +at the NeedForSpeed sprint.) + \item The code generator's peephole optimizer now performs simple constant folding in expressions. If you write something like \code{a = 2+3}, the code generator will do the arithmetic and produce code corresponding to \code{a = 5}. +\item Function calls are now faster because code objects now keep +the most recently finished frame (a ``zombie frame'') in an internal +field of the code object, reusing it the next time the code object is +invoked. (Original patch by Michael Hudson, modified by Armin Rigo +and Richard Jones; committed at the NeedForSpeed sprint.) +% Patch 876206 + +Frame objects are also slightly smaller, which may improve cache locality +and reduce memory usage a bit. (Contributed by Neal Norwitz.) +% Patch 1337051 + +\item Python's built-in exceptions are now new-style classes, a change +that speeds up instantiation considerably. Exception handling in +Python 2.5 is therefore about 30\% faster than in 2.4. +(Contributed by Richard Jones, Georg Brandl and Sean Reifschneider at +the NeedForSpeed sprint.) + +\item Importing now caches the paths tried, recording whether +they exist or not so that the interpreter makes fewer +\cfunction{open()} and \cfunction{stat()} calls on startup. +(Contributed by Martin von~L\"owis and Georg Brandl.) +% Patch 921466 + \end{itemize} The net result of the 2.5 optimizations is that Python 2.5 runs the @@ -1185,7 +1220,7 @@ pystone benchmark around XXX\% faster than Python 2.4. %====================================================================== -\section{New, Improved, and Removed Modules} +\section{New, Improved, and Removed Modules\label{modules}} The standard library received many enhancements and bug fixes in Python 2.5. Here's a partial list of the most notable changes, sorted @@ -1255,7 +1290,6 @@ raising \exception{ValueError} if the value isn't found. \item New module: The \module{contextlib} module contains helper functions for use with the new '\keyword{with}' statement. See section~\ref{module-contextlib} for more about this module. -(Contributed by Phillip J. Eby.) \item New module: The \module{cProfile} module is a C implementation of the existing \module{profile} module that has much lower overhead. @@ -1266,8 +1300,8 @@ which is also written in C but doesn't match the \module{profile} module's interface, will continue to be maintained in future versions of Python. (Contributed by Armin Rigo.) -Also, the \module{pstats} module used to analyze the data measured by -the profiler now supports directing the output to any file stream +Also, the \module{pstats} module for analyzing the data measured by +the profiler now supports directing the output to any file object by supplying a \var{stream} argument to the \class{Stats} constructor. (Contributed by Skip Montanaro.) @@ -1295,6 +1329,11 @@ ts = datetime.strptime('10:13:15 2006-03-07', '%H:%M:%S %Y-%m-%d') \end{verbatim} +\item The \module{doctest} module gained a \code{SKIP} option that +keeps an example from being executed at all. This is intended for +code snippets that are usage examples intended for the reader and +aren't actually test cases. + \item The \module{fileinput} module was made more flexible. Unicode filenames are now supported, and a \var{mode} parameter that defaults to \code{"r"} was added to the @@ -1344,6 +1383,35 @@ itertools.islice(iterable, s.start, s.stop, s.step) (Contributed by Raymond Hettinger.) +\item The \module{mailbox} module underwent a massive rewrite to add +the capability to modify mailboxes in addition to reading them. A new +set of classes that include \class{mbox}, \class{MH}, and +\class{Maildir} are used to read mailboxes, and have an +\method{add(\var{message})} method to add messages, +\method{remove(\var{key})} to remove messages, and +\method{lock()}/\method{unlock()} to lock/unlock the mailbox. The +following example converts a maildir-format mailbox into an mbox-format one: + +\begin{verbatim} +import mailbox + +# 'factory=None' uses email.Message.Message as the class representing +# individual messages. +src = mailbox.Maildir('maildir', factory=None) +dest = mailbox.mbox('/tmp/mbox') + +for msg in src: + dest.add(msg) +\end{verbatim} + +(Contributed by Gregory K. Johnson. Funding was provided by Google's +2005 Summer of Code.) + +\item New module: the \module{msilib} module allows creating +Microsoft Installer \file{.msi} files and CAB files. Some support +for reading the \file{.msi} database is also included. +(Contributed by Martin von~L\"owis.) + \item The \module{nis} module now supports accessing domains other than the system default domain by supplying a \var{domain} argument to the \function{nis.match()} and \function{nis.maps()} functions. @@ -1358,6 +1426,11 @@ this new feature with the \method{sort()} method's \code{key} parameter lets you easily sort lists using multiple fields. (Contributed by Raymond Hettinger.) +\item The \module{optparse} module was updated to version 1.5.1 of the +Optik library. The \class{OptionParser} class gained an +\member{epilog} attribute, a string that will be printed after the +help message, and a \method{destroy()} method to break reference +cycles created by the object. (Contributed by Greg Ward.) \item The \module{os} module underwent several changes. The \member{stat_float_times} variable now defaults to true, meaning that @@ -1389,12 +1462,35 @@ The \member{st_flags} member is also available, if the platform supports it. (Contributed by Antti Louko and Diego Petten\`o.) % (Patch 1180695, 1212117) +\item The Python debugger provided by the \module{pdb} module +can now store lists of commands to execute when a breakpoint is +reached and execution stops. Once breakpoint \#1 has been created, +enter \samp{commands 1} and enter a series of commands to be executed, +finishing the list with \samp{end}. The command list can include +commands that resume execution, such as \samp{continue} or +\samp{next}. (Contributed by Gr\'egoire Dooms.) +% Patch 790710 + \item The \module{pickle} and \module{cPickle} modules no longer accept a return value of \code{None} from the \method{__reduce__()} method; the method must return a tuple of arguments instead. The ability to return \code{None} was deprecated in Python 2.4, so this completes the removal of the feature. +\item The \module{pkgutil} module, containing various utility +functions for finding packages, was enhanced to support PEP 302's +import hooks and now also works for packages stored in ZIP-format archives. +(Contributed by Phillip J. Eby.) + +\item The pybench benchmark suite by Marc-Andr\'e~Lemburg is now +included in the \file{Tools/pybench} directory. The pybench suite is +an improvement on the commonly used \file{pystone.py} program because +pybench provides a more detailed measurement of the interpreter's +speed. It times particular operations such as function calls, +tuple slicing, method lookups, and numeric operations, instead of +performing many different operations and reducing the result to a +single number as \file{pystone.py} does. + \item The old \module{regex} and \module{regsub} modules, which have been deprecated ever since Python 2.0, have finally been deleted. Other deleted modules: \module{statcache}, \module{tzparse}, @@ -1406,6 +1502,12 @@ which includes ancient modules such as \module{dircmp} and \code{sys.path}, so unless your programs explicitly added the directory to \code{sys.path}, this removal shouldn't affect your code. +\item The \module{rlcompleter} module is no longer +dependent on importing the \module{readline} module and +therefore now works on non-{\UNIX} platforms. +(Patch from Robert Kiendl.) +% Patch #1472854 + \item The \module{socket} module now supports \constant{AF_NETLINK} sockets on Linux, thanks to a patch from Philippe Biondi. Netlink sockets are a Linux-specific mechanism for communications @@ -1414,20 +1516,52 @@ article about them is at \url{http://www.linuxjournal.com/article/7356}. In Python code, netlink addresses are represented as a tuple of 2 integers, \code{(\var{pid}, \var{group_mask})}. -Socket objects also gained accessor methods \method{getfamily()}, -\method{gettype()}, and \method{getproto()} methods to retrieve the -family, type, and protocol values for the socket. +Two new methods on socket objects, \method{recv_buf(\var{buffer})} and +\method{recvfrom_buf(\var{buffer})}, store the received data in an object +that supports the buffer protocol instead of returning the data as a +string. This means you can put the data directly into an array or a +memory-mapped file. + +Socket objects also gained \method{getfamily()}, \method{gettype()}, +and \method{getproto()} accessor methods to retrieve the family, type, +and protocol values for the socket. \item New module: the \module{spwd} module provides functions for accessing the shadow password database on systems that support shadow passwords. +\item The \module{struct} is now faster because it +compiles format strings into \class{Struct} objects +with \method{pack()} and \method{unpack()} methods. This is similar +to how the \module{re} module lets you create compiled regular +expression objects. You can still use the module-level +\function{pack()} and \function{unpack()} functions; they'll create +\class{Struct} objects and cache them. Or you can use +\class{Struct} instances directly: + +\begin{verbatim} +s = struct.Struct('ih3s') + +data = s.pack(1972, 187, 'abc') +year, number, name = s.unpack(data) +\end{verbatim} + +You can also pack and unpack data to and from buffer objects directly +using the \method{pack_to(\var{buffer}, \var{offset}, \var{v1}, +\var{v2}, ...)} and \method{unpack_from(\var{buffer}, \var{offset})} +methods. This lets you store data directly into an array or a +memory-mapped file. + +(\class{Struct} objects were implemented by Bob Ippolito at the +NeedForSpeed sprint. Support for buffer objects was added by Martin +Blais, also at the NeedForSpeed sprint.) + \item The Python developers switched from CVS to Subversion during the 2.5 -development process. Information about the exact build version is -available as the \code{sys.subversion} variable, a 3-tuple -of \code{(\var{interpreter-name}, \var{branch-name}, \var{revision-range})}. -For example, at the time of writing -my copy of 2.5 was reporting \code{('CPython', 'trunk', '45313:45315')}. +development process. Information about the exact build version is +available as the \code{sys.subversion} variable, a 3-tuple of +\code{(\var{interpreter-name}, \var{branch-name}, +\var{revision-range})}. For example, at the time of writing my copy +of 2.5 was reporting \code{('CPython', 'trunk', '45313:45315')}. This information is also available to C extensions via the \cfunction{Py_GetBuildInfo()} function that returns a @@ -1449,7 +1583,7 @@ using the mode \code{'r|*'}. \item The \module{unicodedata} module has been updated to use version 4.1.0 of the Unicode character database. Version 3.2.0 is required by some specifications, so it's still available as -\member{unicodedata.db_3_2_0}. +\member{unicodedata.ucd_3_2_0}. \item The \module{webbrowser} module received a number of enhancements. @@ -1474,13 +1608,19 @@ Brandl.) (Contributed by Skip Montanaro.) % Patch 1120353 +\item The \module{zlib} module's \class{Compress} and \class{Decompress} +objects now support a \method{copy()} method that makes a copy of the +object's internal state and returns a new +\class{Compress} or \class{Decompress} object. +(Contributed by Chris AtLee.) +% Patch 1435422 \end{itemize} %====================================================================== -\subsection{The ctypes package} +\subsection{The ctypes package\label{module-ctypes}} The \module{ctypes} package, written by Thomas Heller, has been added to the standard library. \module{ctypes} lets you call arbitrary functions @@ -1562,10 +1702,10 @@ of extension modules, now that \module{ctypes} is included with core Python. %====================================================================== -\subsection{The ElementTree package} +\subsection{The ElementTree package\label{module-etree}} A subset of Fredrik Lundh's ElementTree library for processing XML has -been added to the standard library as \module{xmlcore.etree}. The +been added to the standard library as \module{xml.etree}. The available modules are \module{ElementTree}, \module{ElementPath}, and \module{ElementInclude} from ElementTree 1.2.6. @@ -1587,7 +1727,7 @@ takes either a string (assumed to contain a filename) or a file-like object and returns an \class{ElementTree} instance: \begin{verbatim} -from xmlcore.etree import ElementTree as ET +from xml.etree import ElementTree as ET tree = ET.parse('ex-1.xml') @@ -1605,7 +1745,7 @@ This function provides a tidy way to incorporate XML fragments, approaching the convenience of an XML literal: \begin{verbatim} -svg = et.XML("""<svg width="10px" version="1.0"> +svg = ET.XML("""<svg width="10px" version="1.0"> </svg>""") svg.set('height', '320px') svg.append(elem1) @@ -1619,7 +1759,7 @@ values, and list-like operations are used to access child nodes. \lineii{elem[n]}{Returns n'th child element.} \lineii{elem[m:n]}{Returns list of m'th through n'th child elements.} \lineii{len(elem)}{Returns number of child elements.} - \lineii{elem.getchildren()}{Returns list of child elements.} + \lineii{list(elem)}{Returns list of child elements.} \lineii{elem.append(elem2)}{Adds \var{elem2} as a child.} \lineii{elem.insert(index, elem2)}{Inserts \var{elem2} at the specified location.} \lineii{del elem[n]}{Deletes n'th child element.} @@ -1651,14 +1791,15 @@ tree.write('output.xml') # Encoding is UTF-8 f = open('output.xml', 'w') -tree.write(f, 'utf-8') +tree.write(f, encoding='utf-8') \end{verbatim} -(Caution: the default encoding used for output is ASCII, which isn't -very useful for general XML work, raising an exception if there are -any characters with values greater than 127. You should always -specify a different encoding such as UTF-8 that can handle any Unicode -character.) +(Caution: the default encoding used for output is ASCII. For general +XML work, where an element's name may contain arbitrary Unicode +characters, ASCII isn't a very useful encoding because it will raise +an exception if an element's name contains any characters with values +greater than 127. Therefore, it's best to specify a different +encoding such as UTF-8 that can handle any Unicode character.) This section is only a partial description of the ElementTree interfaces. Please read the package's official documentation for more details. @@ -1673,7 +1814,7 @@ Please read the package's official documentation for more details. %====================================================================== -\subsection{The hashlib package} +\subsection{The hashlib package\label{module-hashlib}} A new \module{hashlib} module, written by Gregory P. Smith, has been added to replace the @@ -1721,7 +1862,7 @@ and \method{copy()} returns a new hashing object with the same digest state. %====================================================================== -\subsection{The sqlite3 package} +\subsection{The sqlite3 package\label{module-sqlite}} The pysqlite module (\url{http://www.pysqlite.org}), a wrapper for the SQLite embedded database, has been added to the standard library under @@ -1786,7 +1927,7 @@ c.execute("... where symbol = '%s'" % symbol) # Do this instead t = (symbol,) -c.execute('select * from stocks where symbol=?', ('IBM',)) +c.execute('select * from stocks where symbol=?', t) # Larger example for t in (('2006-03-28', 'BUY', 'IBM', 1000, 45.00), @@ -1835,7 +1976,7 @@ Marc-Andr\'e Lemburg.} % ====================================================================== -\section{Build and C API Changes} +\section{Build and C API Changes\label{build-api}} Changes to Python's build process and to the C API include: @@ -1901,6 +2042,22 @@ string of build information like this: \code{"trunk:45355:45356M, Apr 13 2006, 07:42:19"}. (Contributed by Barry Warsaw.) +\item Two new macros can be used to indicate C functions that are +local to the current file so that a faster calling convention can be +used. \cfunction{Py_LOCAL(\var{type})} declares the function as +returning a value of the specified \var{type} and uses a fast-calling +qualifier. \cfunction{Py_LOCAL_INLINE(\var{type})} does the same thing +and also requests the function be inlined. If +\cfunction{PY_LOCAL_AGGRESSIVE} is defined before \file{python.h} is +included, a set of more aggressive optimizations are enabled for the +module; you should benchmark the results to find out if these +optimizations actually make the code faster. (Contributed by Fredrik +Lundh at the NeedForSpeed sprint.) + +\item \cfunction{PyErr_NewException(\var{name}, \var{base}, +\var{dict})} can now accept a tuple of base classes as its \var{base} +argument. (Contributed by Georg Brandl.) + \item The CPython interpreter is still written in C, but the code can now be compiled with a {\Cpp} compiler without errors. (Implemented by Anthony Baxter, Martin von~L\"owis, Skip Montanaro.) @@ -1913,7 +2070,7 @@ error checking. %====================================================================== -\subsection{Port-Specific Changes} +\subsection{Port-Specific Changes\label{ports}} \begin{itemize} @@ -1921,6 +2078,11 @@ error checking. now uses the \cfunction{dlopen()} function instead of MacOS-specific functions. +\item MacOS X: a \longprogramopt{enable-universalsdk} switch was added +to the \program{configure} script that compiles the interpreter as a +universal binary able to run on both PowerPC and Intel processors. +(Contributed by Ronald Oussoren.) + \item Windows: \file{.dll} is no longer supported as a filename extension for extension modules. \file{.pyd} is now the only filename extension that will be searched for. @@ -1977,7 +2139,7 @@ carefully test your C extension modules with Python 2.5. %====================================================================== -\section{Porting to Python 2.5} +\section{Porting to Python 2.5\label{porting}} This section lists previously described changes that may require changes to your code: @@ -2023,7 +2185,7 @@ freed with the corresponding family's \cfunction{*_Free()} function. The author would like to thank the following people for offering suggestions, corrections and assistance with various drafts of this -article: Phillip J. Eby, Kent Johnson, Martin von~L\"owis, Gustavo -Niemeyer, Mike Rovner, Thomas Wouters. +article: Phillip J. Eby, Kent Johnson, Martin von~L\"owis, Fredrik Lundh, +Gustavo Niemeyer, James Pryor, Mike Rovner, Scott Weikart, Thomas Wouters. \end{document} |