libasynchat.tex 10.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
\section{\module{asynchat} ---
         Asynchronous socket command/response handler}

\declaremodule{standard}{asynchat}
\modulesynopsis{Support for asynchronous command/response protocols.}
\moduleauthor{Sam Rushing}{rushing@nightmare.com}
\sectionauthor{Steve Holden}{sholden@holdenweb.com}

This module builds on the \refmodule{asyncore} infrastructure,
simplifying asynchronous clients and servers and making it easier to
handle protocols whose elements are terminated by arbitrary strings, or
are of variable length. \refmodule{asynchat} defines the abstract class
\class{async_chat} that you subclass, providing implementations of the
\method{collect_incoming_data()} and \method{found_terminator()}
methods. It uses the same asynchronous loop as \refmodule{asyncore}, and
16
the two types of channel, \class{asyncore.dispatcher} and
17
\class{asynchat.async_chat}, can freely be mixed in the channel map.
18
Typically an \class{asyncore.dispatcher} server channel generates new
19 20 21 22
\class{asynchat.async_chat} channel objects as it receives incoming
connection requests. 

\begin{classdesc}{async_chat}{}
23
  This class is an abstract subclass of \class{asyncore.dispatcher}. To make
24 25
  practical use of the code you must subclass \class{async_chat}, providing
  meaningful \method{collect_incoming_data()} and \method{found_terminator()}
26
  methods. The \class{asyncore.dispatcher} methods can be
27 28
  used, although not all make sense in a message/response context.  

29
  Like \class{asyncore.dispatcher}, \class{async_chat} defines a set of events
30 31 32 33 34
  that are generated by an analysis of socket conditions after a
  \cfunction{select()} call. Once the polling loop has been started the
  \class{async_chat} object's methods are called by the event-processing
  framework with no action on the part of the programmer.

35
  Unlike \class{asyncore.dispatcher}, \class{async_chat} allows you to define
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
  a first-in-first-out queue (fifo) of \emph{producers}. A producer need have
  only one method, \method{more()}, which should return data to be transmitted
  on the channel. The producer indicates exhaustion (\emph{i.e.} that it contains
  no more data) by having its \method{more()} method return the empty string. At
  this point the \class{async_chat} object removes the producer from the fifo
  and starts using the next producer, if any. When the producer fifo is empty
  the \method{handle_write()} method does nothing. You use the channel object's
  \method{set_terminator()} method to describe how to recognize the end
  of, or an important breakpoint in, an incoming transmission from the
  remote endpoint.

  To build a functioning \class{async_chat} subclass your 
  input methods \method{collect_incoming_data()} and
  \method{found_terminator()} must handle the data that the channel receives
  asynchronously. The methods are described below.
\end{classdesc}

\begin{methoddesc}{close_when_done}{}
  Pushes a \code{None} on to the producer fifo. When this producer is
  popped off the fifo it causes the channel to be closed.
\end{methoddesc}

\begin{methoddesc}{collect_incoming_data}{data}
  Called with \var{data} holding an arbitrary amount of received data.
  The default method, which must be overridden, raises a \exception{NotImplementedError} exception.
\end{methoddesc}

\begin{methoddesc}{discard_buffers}{}
  In emergencies this method will discard any data held in the input and/or
  output buffers and the producer fifo.
\end{methoddesc}

\begin{methoddesc}{found_terminator}{}
  Called when the incoming data stream  matches the termination condition
  set by \method{set_terminator}. The default method, which must be overridden,
  raises a \exception{NotImplementedError} exception. The buffered input data should
  be available via an instance attribute.
\end{methoddesc}

\begin{methoddesc}{get_terminator}{}
  Returns the current terminator for the channel.
\end{methoddesc}

\begin{methoddesc}{handle_close}{}
  Called when the channel is closed. The default method silently closes
  the channel's socket.
\end{methoddesc}

\begin{methoddesc}{handle_read}{}
  Called when a read event fires on the channel's socket in the
  asynchronous loop. The default method checks for the termination
  condition established by \method{set_terminator()}, which can be either
  the appearance of a particular string in the input stream or the receipt
  of a particular number of characters. When the terminator is found,
  \method{handle_read} calls the \method{found_terminator()} method after
  calling \method{collect_incoming_data()} with any data preceding the
  terminating condition.
\end{methoddesc}

\begin{methoddesc}{handle_write}{}
  Called when the application may write data to the channel.  
  The default method calls the \method{initiate_send()} method, which in turn
  will call \method{refill_buffer()} to collect data from the producer
  fifo associated with the channel.
\end{methoddesc}

\begin{methoddesc}{push}{data}
  Creates a \class{simple_producer} object (\emph{see below}) containing the data and
  pushes it on to the channel's \code{producer_fifo} to ensure its
  transmission. This is all you need to do to have the channel write
  the data out to the network, although it is possible to use your
  own producers in more complex schemes to implement encryption and
  chunking, for example.
\end{methoddesc}

\begin{methoddesc}{push_with_producer}{producer}
  Takes a producer object and adds it to the producer fifo associated with
  the channel. When all currently-pushed producers have been exhausted
  the channel will consume this producer's data by calling its
  \method{more()} method and send the data to the remote endpoint. 
\end{methoddesc}

\begin{methoddesc}{readable}{}
  Should return \code{True} for the channel to be included in the set of
  channels tested by the \cfunction{select()} loop for readability.
\end{methoddesc}

\begin{methoddesc}{refill_buffer}{}
  Refills the output buffer by calling the \method{more()} method of the
  producer at the head of the fifo. If it is exhausted then the
  producer is popped off the fifo and the next producer is activated.
  If the current producer is, or becomes, \code{None} then the channel
  is closed.
\end{methoddesc}

\begin{methoddesc}{set_terminator}{term}
  Sets the terminating condition to be recognised on the channel. \code{term}
  may be any of three types of value, corresponding to three different ways
  to handle incoming protocol data.

  \begin{tableii}{l|l}{}{term}{Description}
    \lineii{\emph{string}}{Will call \method{found_terminator()} when the
                string is found in the input stream}
    \lineii{\emph{integer}}{Will call \method{found_terminator()} when the
                indicated number of characters have been received}
    \lineii{\code{None}}{The channel continues to collect data forever}
  \end{tableii}

  Note that any data following the terminator will be available for reading by
  the channel after \method{found_terminator()} is called.
\end{methoddesc}

\begin{methoddesc}{writable}{}
  Should return \code{True} as long as items remain on the producer fifo,
  or the channel is connected and the channel's output buffer is non-empty.
\end{methoddesc}

\subsection{asynchat - Auxiliary Classes and Functions}

\begin{classdesc}{simple_producer}{data\optional{, buffer_size=512}}
  A \class{simple_producer} takes a chunk of data and an optional buffer size.
  Repeated calls to its \method{more()} method yield successive chunks of the
  data no larger than \var{buffer_size}.
\end{classdesc}

\begin{methoddesc}{more}{}
  Produces the next chunk of information from the producer, or returns the empty string.
\end{methoddesc}

\begin{classdesc}{fifo}{\optional{list=None}}
  Each channel maintains a \class{fifo} holding data which has been pushed by the
  application but not yet popped for writing to the channel.
  A \class{fifo} is a list used to hold data and/or producers until they are required.
  If the \var{list} argument is provided then it should contain producers or
  data items to be written to the channel.
\end{classdesc}

\begin{methoddesc}{is_empty}{}
  Returns \code{True} iff the fifo is empty.
\end{methoddesc}

\begin{methoddesc}{first}{}
  Returns the least-recently \method{push()}ed item from the fifo.
\end{methoddesc}

\begin{methoddesc}{push}{data}
  Adds the given data (which may be a string or a producer object) to the
  producer fifo.
\end{methoddesc}

\begin{methoddesc}{pop}{}
  If the fifo is not empty, returns \code{True, first()}, deleting the popped
  item. Returns \code{False, None} for an empty fifo.
\end{methoddesc}

The \module{asynchat} module also defines one utility function, which may be
of use in network and textual analysis operations.

\begin{funcdesc}{find_prefix_at_end}{haystack, needle}
  Returns \code{True} if string \var{haystack} ends with any non-empty
  prefix of string \var{needle}.
\end{funcdesc}

\subsection{asynchat Example \label{asynchat-example}}

The following partial example shows how HTTP requests can be read with
\class{async_chat}. A web server might create an \class{http_request_handler} object for
each incoming client connection. Notice that initially the
channel terminator is set to match the blank line at the end of the HTTP
headers, and a flag indicates that the headers are being read.

Once the headers have been read, if the request is of type POST
(indicating that further data are present in the input stream) then the
\code{Content-Length:} header is used to set a numeric terminator to
read the right amount of data from the channel.

The \method{handle_request()} method is called once all relevant input
has been marshalled, after setting the channel terminator to \code{None}
to ensure that any extraneous data sent by the web client are ignored.

\begin{verbatim}
class http_request_handler(asynchat.async_chat):

    def __init__(self, conn, addr, sessions, log):
        asynchat.async_chat.__init__(self, conn=conn)
        self.addr = addr
        self.sessions = sessions
        self.ibuffer = []
        self.obuffer = ""
        self.set_terminator("\r\n\r\n")
        self.reading_headers = True
        self.handling = False
        self.cgi_data = None
        self.log = log

    def collect_incoming_data(self, data):
        """Buffer the data"""
        self.ibuffer.append(data)

    def found_terminator(self):
        if self.reading_headers:
            self.reading_headers = False
238
            self.parse_headers("".join(self.ibuffer))
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
            self.ibuffer = []
            if self.op.upper() == "POST":
                clen = self.headers.getheader("content-length")
                self.set_terminator(int(clen))
            else:
                self.handling = True
                self.set_terminator(None)
                self.handle_request()
        elif not self.handling:
            self.set_terminator(None) # browsers sometimes over-send
            self.cgi_data = parse(self.headers, "".join(self.ibuffer))
            self.handling = True
            self.ibuffer = []
            self.handle_request()
\end{verbatim}