summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Holden <steve@holdenweb.com>2002-07-03 18:36:39 (GMT)
committerSteve Holden <steve@holdenweb.com>2002-07-03 18:36:39 (GMT)
commitb1af86a1d7ada842ede1d5ea02a9a04f3939b8ca (patch)
treecbc4c839a75285da08a7fdb71e646b108812dd50
parentdf872a20525dbc69e398a1a30ce416327eed8b0b (diff)
downloadcpython-b1af86a1d7ada842ede1d5ea02a9a04f3939b8ca.zip
cpython-b1af86a1d7ada842ede1d5ea02a9a04f3939b8ca.tar.gz
cpython-b1af86a1d7ada842ede1d5ea02a9a04f3939b8ca.tar.bz2
Revise asyncore documentation and document asynchat for the first time.
-rw-r--r--Doc/Makefile.deps1
-rw-r--r--Doc/lib/lib.tex1
-rw-r--r--Doc/lib/libasynchat.tex254
-rw-r--r--Doc/lib/libasyncore.tex146
4 files changed, 347 insertions, 55 deletions
diff --git a/Doc/Makefile.deps b/Doc/Makefile.deps
index 29a6bc1..7693c21 100644
--- a/Doc/Makefile.deps
+++ b/Doc/Makefile.deps
@@ -315,6 +315,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \
lib/libstatvfs.tex \
lib/libtty.tex \
lib/libasyncore.tex \
+ lib/libasynchat.tex \
lib/libatexit.tex \
lib/libmmap.tex \
lib/tkinter.tex \
diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex
index 70b1c93..d87a1cf 100644
--- a/Doc/lib/lib.tex
+++ b/Doc/lib/lib.tex
@@ -217,6 +217,7 @@ and how to embed it in other applications.
\input{libxmlrpclib}
\input{libsimplexmlrpc}
\input{libasyncore}
+\input{libasynchat}
\input{netdata} % Internet Data Handling
\input{libformatter}
diff --git a/Doc/lib/libasynchat.tex b/Doc/lib/libasynchat.tex
new file mode 100644
index 0000000..eea4f18
--- /dev/null
+++ b/Doc/lib/libasynchat.tex
@@ -0,0 +1,254 @@
+\section{\module{asynchat} ---
+ Asynchronous socket command/response handler}
+
+\declaremodule{standard}{asynchat}
+\modulesynopsis{Support for asynchronous command/response protocols.}
+\moduleauthor{Sam Rushing}{rushing@nightmare.com}
+\sectionauthor{Steve Holden}{sholden@holdenweb.com}
+
+This module builds on the \refmodule{asyncore} infrastructure,
+simplifying asynchronous clients and servers and making it easier to
+handle protocols whose elements are terminated by arbitrary strings, or
+are of variable length. \refmodule{asynchat} defines the abstract class
+\class{async_chat} that you subclass, providing implementations of the
+\method{collect_incoming_data()} and \method{found_terminator()}
+methods. It uses the same asynchronous loop as \refmodule{asyncore}, and
+the two types of channel, \class{asyncore.despatcher} and
+\class{asynchat.async_chat}, can freely be mixed in the channel map.
+Typically an \class{asyncore.despatcher} server channel generates new
+\class{asynchat.async_chat} channel objects as it receives incoming
+connection requests.
+
+\begin{classdesc}{async_chat}{}
+ This class is an abstract subclass of \class{asyncore.despatcher}. To make
+ practical use of the code you must subclass \class{async_chat}, providing
+ meaningful \method{collect_incoming_data()} and \method{found_terminator()}
+ methods. The \class{asyncore.despatcher} methods can be
+ used, although not all make sense in a message/response context.
+
+ Like \class{asyncore.despatcher}, \class{async_chat} defines a set of events
+ that are generated by an analysis of socket conditions after a
+ \cfunction{select()} call. Once the polling loop has been started the
+ \class{async_chat} object's methods are called by the event-processing
+ framework with no action on the part of the programmer.
+
+ Unlike \class{asyncore.despatcher}, \class{async_chat} allows you to define
+ a first-in-first-out queue (fifo) of \emph{producers}. A producer need have
+ only one method, \method{more()}, which should return data to be transmitted
+ on the channel. The producer indicates exhaustion (\emph{i.e.} that it contains
+ no more data) by having its \method{more()} method return the empty string. At
+ this point the \class{async_chat} object removes the producer from the fifo
+ and starts using the next producer, if any. When the producer fifo is empty
+ the \method{handle_write()} method does nothing. You use the channel object's
+ \method{set_terminator()} method to describe how to recognize the end
+ of, or an important breakpoint in, an incoming transmission from the
+ remote endpoint.
+
+ To build a functioning \class{async_chat} subclass your
+ input methods \method{collect_incoming_data()} and
+ \method{found_terminator()} must handle the data that the channel receives
+ asynchronously. The methods are described below.
+\end{classdesc}
+
+\begin{methoddesc}{close_when_done}{}
+ Pushes a \code{None} on to the producer fifo. When this producer is
+ popped off the fifo it causes the channel to be closed.
+\end{methoddesc}
+
+\begin{methoddesc}{collect_incoming_data}{data}
+ Called with \var{data} holding an arbitrary amount of received data.
+ The default method, which must be overridden, raises a \exception{NotImplementedError} exception.
+\end{methoddesc}
+
+\begin{methoddesc}{discard_buffers}{}
+ In emergencies this method will discard any data held in the input and/or
+ output buffers and the producer fifo.
+\end{methoddesc}
+
+\begin{methoddesc}{found_terminator}{}
+ Called when the incoming data stream matches the termination condition
+ set by \method{set_terminator}. The default method, which must be overridden,
+ raises a \exception{NotImplementedError} exception. The buffered input data should
+ be available via an instance attribute.
+\end{methoddesc}
+
+\begin{methoddesc}{get_terminator}{}
+ Returns the current terminator for the channel.
+\end{methoddesc}
+
+\begin{methoddesc}{handle_close}{}
+ Called when the channel is closed. The default method silently closes
+ the channel's socket.
+\end{methoddesc}
+
+\begin{methoddesc}{handle_read}{}
+ Called when a read event fires on the channel's socket in the
+ asynchronous loop. The default method checks for the termination
+ condition established by \method{set_terminator()}, which can be either
+ the appearance of a particular string in the input stream or the receipt
+ of a particular number of characters. When the terminator is found,
+ \method{handle_read} calls the \method{found_terminator()} method after
+ calling \method{collect_incoming_data()} with any data preceding the
+ terminating condition.
+\end{methoddesc}
+
+\begin{methoddesc}{handle_write}{}
+ Called when the application may write data to the channel.
+ The default method calls the \method{initiate_send()} method, which in turn
+ will call \method{refill_buffer()} to collect data from the producer
+ fifo associated with the channel.
+\end{methoddesc}
+
+\begin{methoddesc}{push}{data}
+ Creates a \class{simple_producer} object (\emph{see below}) containing the data and
+ pushes it on to the channel's \code{producer_fifo} to ensure its
+ transmission. This is all you need to do to have the channel write
+ the data out to the network, although it is possible to use your
+ own producers in more complex schemes to implement encryption and
+ chunking, for example.
+\end{methoddesc}
+
+\begin{methoddesc}{push_with_producer}{producer}
+ Takes a producer object and adds it to the producer fifo associated with
+ the channel. When all currently-pushed producers have been exhausted
+ the channel will consume this producer's data by calling its
+ \method{more()} method and send the data to the remote endpoint.
+\end{methoddesc}
+
+\begin{methoddesc}{readable}{}
+ Should return \code{True} for the channel to be included in the set of
+ channels tested by the \cfunction{select()} loop for readability.
+\end{methoddesc}
+
+\begin{methoddesc}{refill_buffer}{}
+ Refills the output buffer by calling the \method{more()} method of the
+ producer at the head of the fifo. If it is exhausted then the
+ producer is popped off the fifo and the next producer is activated.
+ If the current producer is, or becomes, \code{None} then the channel
+ is closed.
+\end{methoddesc}
+
+\begin{methoddesc}{set_terminator}{term}
+ Sets the terminating condition to be recognised on the channel. \code{term}
+ may be any of three types of value, corresponding to three different ways
+ to handle incoming protocol data.
+
+ \begin{tableii}{l|l}{}{term}{Description}
+ \lineii{\emph{string}}{Will call \method{found_terminator()} when the
+ string is found in the input stream}
+ \lineii{\emph{integer}}{Will call \method{found_terminator()} when the
+ indicated number of characters have been received}
+ \lineii{\code{None}}{The channel continues to collect data forever}
+ \end{tableii}
+
+ Note that any data following the terminator will be available for reading by
+ the channel after \method{found_terminator()} is called.
+\end{methoddesc}
+
+\begin{methoddesc}{writable}{}
+ Should return \code{True} as long as items remain on the producer fifo,
+ or the channel is connected and the channel's output buffer is non-empty.
+\end{methoddesc}
+
+\subsection{asynchat - Auxiliary Classes and Functions}
+
+\begin{classdesc}{simple_producer}{data\optional{, buffer_size=512}}
+ A \class{simple_producer} takes a chunk of data and an optional buffer size.
+ Repeated calls to its \method{more()} method yield successive chunks of the
+ data no larger than \var{buffer_size}.
+\end{classdesc}
+
+\begin{methoddesc}{more}{}
+ Produces the next chunk of information from the producer, or returns the empty string.
+\end{methoddesc}
+
+\begin{classdesc}{fifo}{\optional{list=None}}
+ Each channel maintains a \class{fifo} holding data which has been pushed by the
+ application but not yet popped for writing to the channel.
+ A \class{fifo} is a list used to hold data and/or producers until they are required.
+ If the \var{list} argument is provided then it should contain producers or
+ data items to be written to the channel.
+\end{classdesc}
+
+\begin{methoddesc}{is_empty}{}
+ Returns \code{True} iff the fifo is empty.
+\end{methoddesc}
+
+\begin{methoddesc}{first}{}
+ Returns the least-recently \method{push()}ed item from the fifo.
+\end{methoddesc}
+
+\begin{methoddesc}{push}{data}
+ Adds the given data (which may be a string or a producer object) to the
+ producer fifo.
+\end{methoddesc}
+
+\begin{methoddesc}{pop}{}
+ If the fifo is not empty, returns \code{True, first()}, deleting the popped
+ item. Returns \code{False, None} for an empty fifo.
+\end{methoddesc}
+
+The \module{asynchat} module also defines one utility function, which may be
+of use in network and textual analysis operations.
+
+\begin{funcdesc}{find_prefix_at_end}{haystack, needle}
+ Returns \code{True} if string \var{haystack} ends with any non-empty
+ prefix of string \var{needle}.
+\end{funcdesc}
+
+\subsection{asynchat Example \label{asynchat-example}}
+
+The following partial example shows how HTTP requests can be read with
+\class{async_chat}. A web server might create an \class{http_request_handler} object for
+each incoming client connection. Notice that initially the
+channel terminator is set to match the blank line at the end of the HTTP
+headers, and a flag indicates that the headers are being read.
+
+Once the headers have been read, if the request is of type POST
+(indicating that further data are present in the input stream) then the
+\code{Content-Length:} header is used to set a numeric terminator to
+read the right amount of data from the channel.
+
+The \method{handle_request()} method is called once all relevant input
+has been marshalled, after setting the channel terminator to \code{None}
+to ensure that any extraneous data sent by the web client are ignored.
+
+\begin{verbatim}
+class http_request_handler(asynchat.async_chat):
+
+ def __init__(self, conn, addr, sessions, log):
+ asynchat.async_chat.__init__(self, conn=conn)
+ self.addr = addr
+ self.sessions = sessions
+ self.ibuffer = []
+ self.obuffer = ""
+ self.set_terminator("\r\n\r\n")
+ self.reading_headers = True
+ self.handling = False
+ self.cgi_data = None
+ self.log = log
+
+ def collect_incoming_data(self, data):
+ """Buffer the data"""
+ self.ibuffer.append(data)
+
+ def found_terminator(self):
+ if self.reading_headers:
+ self.reading_headers = False
+ self.parse_headers("".join(self.ibuffer)
+ self.ibuffer = []
+ if self.op.upper() == "POST":
+ clen = self.headers.getheader("content-length")
+ self.set_terminator(int(clen))
+ else:
+ self.handling = True
+ self.set_terminator(None)
+ self.handle_request()
+ elif not self.handling:
+ self.set_terminator(None) # browsers sometimes over-send
+ self.cgi_data = parse(self.headers, "".join(self.ibuffer))
+ self.handling = True
+ self.ibuffer = []
+ self.handle_request()
+\end{verbatim}
+
diff --git a/Doc/lib/libasyncore.tex b/Doc/lib/libasyncore.tex
index a85998e..be19295 100644
--- a/Doc/lib/libasyncore.tex
+++ b/Doc/lib/libasyncore.tex
@@ -6,6 +6,7 @@
handling services.}
\moduleauthor{Sam Rushing}{rushing@nightmare.com}
\sectionauthor{Christopher Petrilli}{petrilli@amber.org}
+\sectionauthor{Steve Holden}{sholden@holdenweb.com}
% Heavily adapted from original documentation by Sam Rushing.
This module provides the basic infrastructure for writing asynchronous
@@ -26,35 +27,21 @@ multiple communication channels at once; doing other work while your
I/O is taking place in the ``background.'' Although this strategy can
seem strange and complex, especially at first, it is in many ways
easier to understand and control than multi-threaded programming.
-The module documented here solves many of the difficult problems for
+The \module{asyncore} module solves many of the difficult problems for
you, making the task of building sophisticated high-performance
-network servers and clients a snap.
+network servers and clients a snap. For ``conversational'' applications
+and protocols the companion \refmodule{asynchat} module is invaluable.
-\begin{classdesc}{dispatcher}{}
- The first class we will introduce is the \class{dispatcher} class.
- This is a thin wrapper around a low-level socket object. To make
- it more useful, it has a few methods for event-handling on it.
- Otherwise, it can be treated as a normal non-blocking socket object.
+The basic idea behind both modules is to create one or more network
+\emph{channels}, instances of class \class{asyncore.dispatcher} and
+\class{asynchat.async_chat}. Creating the channels adds them to a global
+map, used by the \function{loop()} function if you do not provide it
+with your own \var{map}.
- The direct interface between the select loop and the socket object
- are the \method{handle_read_event()} and
- \method{handle_write_event()} methods. These are called whenever an
- object `fires' that event.
-
- The firing of these low-level events can tell us whether certain
- higher-level events have taken place, depending on the timing and
- the state of the connection. For example, if we have asked for a
- socket to connect to another host, we know that the connection has
- been made when the socket fires a write event (at this point you
- know that you may write to it with the expectation of success).
- The implied higher-level events are:
-
- \begin{tableii}{l|l}{code}{Event}{Description}
- \lineii{handle_connect()}{Implied by a write event}
- \lineii{handle_close()}{Implied by a read event with no data available}
- \lineii{handle_accept()}{Implied by a read event on a listening socket}
- \end{tableii}
-\end{classdesc}
+Once the initial channel(s) is(are) created, calling the \function{loop()}
+function activates channel service, which continues until the last
+channel (including any that have been added to the map during asynchronous
+service) is closed.
\begin{funcdesc}{loop}{\optional{timeout\optional{, use_poll\optional{,
map}}}}
@@ -64,21 +51,67 @@ network servers and clients a snap.
\function{select()} or \function{poll()} call, measured in seconds;
the default is 30 seconds. The \var{use_poll} parameter, if true,
indicates that \function{poll()} should be used in preference to
- \function{select()} (the default is false). The \var{map} parameter
- is a dictionary that gives a list of channels to watch. As channels
+ \function{select()} (the default is \code{False}). The \var{map} parameter
+ is a dictionary whose items are the channels to watch. As channels
are closed they are deleted from their map. If \var{map} is
- omitted, a global map is used.
+ omitted, a global map is used (this map is updated by the default
+ class \method{__init__()}
+ -- make sure you extend, rather than override, \method{__init__()}
+ if you want to retain this behavior).
+
+ Channels (instances of \class{asyncore.despatcher}, \class{asynchat.async_chat}
+ and subclasses thereof) can freely be mixed in the map.
\end{funcdesc}
-This set of user-level events is larger than the basics. The
-full set of methods that can be overridden in your subclass are:
+\begin{classdesc}{dispatcher}{}
+ The \class{dispatcher} class is a thin wrapper around a low-level socket object.
+ To make it more useful, it has a few methods for event-handling which are called
+ from the asynchronous loop.
+ Otherwise, it can be treated as a normal non-blocking socket object.
+
+ Two class attributes can be modified, to improve performance,
+ or possibly even to conserve memory.
+
+ \begin{datadesc}{ac_in_buffer_size}
+ The asynchronous input buffer size (default \code{4096}).
+ \end{datadesc}
+
+ \begin{datadesc}{ac_out_buffer_size}
+ The asynchronous output buffer size (default \code{4096}).
+ \end{datadesc}
+
+ The firing of low-level events at certain times or in certain connection
+ states tells the asynchronous loop that certain higher-level events have
+ taken place. For example, if we have asked for a socket to connect to
+ another host, we know that the connection has been made when the socket
+ becomes writable for the first time (at this point you know that you may
+ write to it with the expectation of success). The implied higher-level
+ events are:
+
+ \begin{tableii}{l|l}{code}{Event}{Description}
+ \lineii{handle_connect()}{Implied by the first write event}
+ \lineii{handle_close()}{Implied by a read event with no data available}
+ \lineii{handle_accept()}{Implied by a read event on a listening socket}
+ \end{tableii}
+
+ During asynchronous processing, each mapped channel's \method{readable()}
+ and \method{writable()} methods are used to determine whether the channel's
+ socket should be added to the list of channels \cfunction{select()}ed or
+ \cfunction{poll()}ed for read and write events.
+
+\end{classdesc}
+
+Thus, the set of channel events is larger than the basic socket events.
+The full set of methods that can be overridden in your subclass follows:
\begin{methoddesc}{handle_read}{}
- Called when there is new data to be read from a socket.
+ Called when the asynchronous loop detects that a \method{read()}
+ call on the channel's socket will succeed.
\end{methoddesc}
\begin{methoddesc}{handle_write}{}
- Called when there is an attempt to write data to the object.
+ Called when the asynchronous loop detects that a writable socket
+ can be written.
Often this method will implement the necessary buffering for
performance. For example:
@@ -96,9 +129,9 @@ def handle_write(self):
\end{methoddesc}
\begin{methoddesc}{handle_connect}{}
- Called when the socket actually makes a connection. This
- might be used to send a ``welcome'' banner, or something
- similar.
+ Called when the active opener's socket actually makes a connection.
+ Might send a ``welcome'' banner, or initiate a protocol
+ negotiation with the remote endpoint, for example.
\end{methoddesc}
\begin{methoddesc}{handle_close}{}
@@ -111,28 +144,29 @@ def handle_write(self):
\end{methoddesc}
\begin{methoddesc}{handle_accept}{}
- Called on listening sockets when they actually accept a new
- connection.
+ Called on listening channels (passive openers) when a
+ connection can be established with a new remote endpoint that
+ has issued a \method{connect()} call for the local endpoint.
\end{methoddesc}
\begin{methoddesc}{readable}{}
- Each time through the \method{select()} loop, the set of sockets
- is scanned, and this method is called to see if there is any
- interest in reading. The default method simply returns \code{True},
- indicating that by default, all channels will be interested.
+ Called each time around the asynchronous loop to determine whether a
+ channel's socket should be added to the list on which read events can
+ occur. The default method simply returns \code{True},
+ indicating that by default, all channels will be interested in
+ read events.
\end{methoddesc}
\begin{methoddesc}{writable}{}
- Each time through the \method{select()} loop, the set of sockets
- is scanned, and this method is called to see if there is any
- interest in writing. The default method simply returns \code{True},
- indicating that by default, all channels will be interested.
+ Called each time around the asynchronous loop to determine whether a
+ channel's socket should be added to the list on which write events can
+ occur. The default method simply returns \code{True},
+ indicating that by default, all channels will be interested in
+ write events.
\end{methoddesc}
-In addition, there are the basic methods needed to construct and
-manipulate ``channels,'' which are what we will call the socket
-connections in this context. Note that most of these are nearly
-identical to their socket partners.
+In addition, each channel delegates or extends many of the socket methods.
+Most of these are nearly identical to their socket partners.
\begin{methoddesc}{create_socket}{family, type}
This is identical to the creation of a normal socket, and
@@ -144,15 +178,17 @@ identical to their socket partners.
\begin{methoddesc}{connect}{address}
As with the normal socket object, \var{address} is a
tuple with the first element the host to connect to, and the
- second the port.
+ second the port number.
\end{methoddesc}
\begin{methoddesc}{send}{data}
- Send \var{data} out the socket.
+ Send \var{data} to the remote end-point of the socket.
\end{methoddesc}
\begin{methoddesc}{recv}{buffer_size}
- Read at most \var{buffer_size} bytes from the socket.
+ Read at most \var{buffer_size} bytes from the socket's remote end-point.
+ An empty string implies that the channel has been closed from the other
+ end.
\end{methoddesc}
\begin{methoddesc}{listen}{backlog}
@@ -179,13 +215,13 @@ identical to their socket partners.
\begin{methoddesc}{close}{}
Close the socket. All future operations on the socket object
- will fail. The remote end will receive no more data (after
+ will fail. The remote end-point will receive no more data (after
queued data is flushed). Sockets are automatically closed
when they are garbage-collected.
\end{methoddesc}
-\subsection{Example basic HTTP client \label{asyncore-example}}
+\subsection{asyncore Example basic HTTP client \label{asyncore-example}}
As a basic example, below is a very basic HTTP client that uses the
\class{dispatcher} class to implement its socket handling: