BaseHTTPServer.py 20.8 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4
"""HTTP server base class.

Note: the class in this module doesn't implement any HTTP request; see
SimpleHTTPServer for simple implementations of GET, HEAD and POST
5 6
(including CGI scripts).  It does, however, optionally implement HTTP/1.1
persistent connections, as of version 0.3.
Guido van Rossum's avatar
Guido van Rossum committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28

Contents:

- BaseHTTPRequestHandler: HTTP request handler base class
- test: test function

XXX To do:

- log requests even later (to capture byte count)
- log user-agent header and other interesting goodies
- send error log to separate file
"""


# See also:
#
# HTTP Working Group                                        T. Berners-Lee
# INTERNET-DRAFT                                            R. T. Fielding
# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
# Expires September 8, 1995                                  March 8, 1995
#
# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
29 30 31 32 33 34
#
# and
#
# Network Working Group                                      R. Fielding
# Request for Comments: 2616                                       et al
# Obsoletes: 2068                                              June 1999
Tim Peters's avatar
Tim Peters committed
35
# Category: Standards Track
36 37
#
# URL: http://www.faqs.org/rfcs/rfc2616.html
Guido van Rossum's avatar
Guido van Rossum committed
38 39 40

# Log files
# ---------
Tim Peters's avatar
Tim Peters committed
41
#
Guido van Rossum's avatar
Guido van Rossum committed
42
# Here's a quote from the NCSA httpd docs about log file format.
Tim Peters's avatar
Tim Peters committed
43 44 45 46 47 48
#
# | The logfile format is as follows. Each line consists of:
# |
# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
# |
# |        host: Either the DNS name or the IP number of the remote client
Guido van Rossum's avatar
Guido van Rossum committed
49
# |        rfc931: Any information returned by identd for this person,
Tim Peters's avatar
Tim Peters committed
50
# |                - otherwise.
Guido van Rossum's avatar
Guido van Rossum committed
51
# |        authuser: If user sent a userid for authentication, the user name,
Tim Peters's avatar
Tim Peters committed
52 53 54 55 56 57 58 59 60
# |                  - otherwise.
# |        DD: Day
# |        Mon: Month (calendar name)
# |        YYYY: Year
# |        hh: hour (24-hour format, the machine's timezone)
# |        mm: minutes
# |        ss: seconds
# |        request: The first line of the HTTP request as sent by the client.
# |        ddd: the status code returned by the server, - if not available.
Guido van Rossum's avatar
Guido van Rossum committed
61
# |        bbbb: the total number of bytes sent,
Tim Peters's avatar
Tim Peters committed
62 63
# |              *not including the HTTP/1.0 header*, - if not available
# |
Guido van Rossum's avatar
Guido van Rossum committed
64
# | You can determine the name of the file accessed through request.
Tim Peters's avatar
Tim Peters committed
65
#
Guido van Rossum's avatar
Guido van Rossum committed
66 67 68
# (Actually, the latter is only true if you know the server configuration
# at the time the request was made!)

69
__version__ = "0.3"
Guido van Rossum's avatar
Guido van Rossum committed
70

71
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
Guido van Rossum's avatar
Guido van Rossum committed
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91

import sys
import time
import socket # For gethostbyaddr()
import mimetools
import SocketServer

# Default error message
DEFAULT_ERROR_MESSAGE = """\
<head>
<title>Error response</title>
</head>
<body>
<h1>Error response</h1>
<p>Error code %(code)d.
<p>Message: %(message)s.
<p>Error code explanation: %(code)s = %(explain)s.
</body>
"""

92 93
def _quote_html(html):
    return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
Guido van Rossum's avatar
Guido van Rossum committed
94 95 96

class HTTPServer(SocketServer.TCPServer):

97 98
    allow_reuse_address = 1    # Seems to make sense in testing environment

Guido van Rossum's avatar
Guido van Rossum committed
99
    def server_bind(self):
100 101
        """Override server_bind to store the server name."""
        SocketServer.TCPServer.server_bind(self)
102
        host, port = self.socket.getsockname()[:2]
103
        self.server_name = socket.getfqdn(host)
104
        self.server_port = port
Guido van Rossum's avatar
Guido van Rossum committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131


class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):

    """HTTP request handler base class.

    The following explanation of HTTP serves to guide you through the
    code as well as to expose any misunderstandings I may have about
    HTTP (so you don't need to read the code to figure out I'm wrong
    :-).

    HTTP (HyperText Transfer Protocol) is an extensible protocol on
    top of a reliable stream transport (e.g. TCP/IP).  The protocol
    recognizes three parts to a request:

    1. One line identifying the request type and path
    2. An optional set of RFC-822-style headers
    3. An optional data part

    The headers and data are separated by a blank line.

    The first line of the request has the form

    <command> <path> <version>

    where <command> is a (case-sensitive) keyword such as GET or POST,
    <path> is a string containing path information for the request,
132 133 134
    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
    <path> is encoded using the URL encoding scheme (using %xx to signify
    the ASCII character with hex code xx).
Guido van Rossum's avatar
Guido van Rossum committed
135

136 137 138 139 140
    The specification specifies that lines are separated by CRLF but
    for compatibility with the widest range of clients recommends
    servers also handle LF.  Similarly, whitespace in the request line
    is treated sensibly (allowing multiple spaces between components
    and allowing trailing whitespace).
Guido van Rossum's avatar
Guido van Rossum committed
141 142 143 144 145 146 147 148 149 150 151 152

    Similarly, for output, lines ought to be separated by CRLF pairs
    but most clients grok LF characters just fine.

    If the first line of the request has the form

    <command> <path>

    (i.e. <version> is left out) then this is assumed to be an HTTP
    0.9 request; this form has no optional headers and data part and
    the reply consists of just the data.

153
    The reply form of the HTTP 1.x protocol again has three parts:
Guido van Rossum's avatar
Guido van Rossum committed
154 155 156 157 158 159 160 161 162 163 164

    1. One line giving the response code
    2. An optional set of RFC-822-style headers
    3. The data

    Again, the headers and data are separated by a blank line.

    The response code line has the form

    <version> <responsecode> <responsestring>

165
    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
Guido van Rossum's avatar
Guido van Rossum committed
166 167 168 169 170 171
    <responsecode> is a 3-digit response code indicating success or
    failure of the request, and <responsestring> is an optional
    human-readable string explaining what the response code means.

    This server parses the request and the headers, and then calls a
    function specific to the request type (<command>).  Specifically,
172
    a request SPAM will be handled by a method do_SPAM().  If no
Guido van Rossum's avatar
Guido van Rossum committed
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
    such method exists the server sends an error response to the
    client.  If it exists, it is called with no arguments:

    do_SPAM()

    Note that the request name is case sensitive (i.e. SPAM and spam
    are different requests).

    The various request details are stored in instance variables:

    - client_address is the client IP address in the form (host,
    port);

    - command, path and version are the broken-down request line;

    - headers is an instance of mimetools.Message (or a derived
    class) containing the header information;

    - rfile is a file object open for reading positioned at the
    start of the optional input data part;

    - wfile is a file object open for writing.

    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!

    The first thing to be written must be the response line.  Then
    follow 0 or more header lines, then a blank line, and then the
    actual data (if any).  The meaning of the header lines depends on
    the command executed by the server; in most cases, when data is
    returned, there should be at least one header line of the form

    Content-type: <type>/<subtype>

    where <type> and <subtype> should be registered MIME types,
    e.g. "text/html" or "text/plain".

    """

    # The Python system version, truncated to its first component.
212
    sys_version = "Python/" + sys.version.split()[0]
Guido van Rossum's avatar
Guido van Rossum committed
213 214 215 216 217 218

    # The server software version.  You may want to override this.
    # The format is multiple whitespace-separated strings,
    # where each string is of the form name[/version].
    server_version = "BaseHTTP/" + __version__

219 220
    def parse_request(self):
        """Parse a request (internal).
221

222
        The request should be stored in self.raw_requestline; the results
223 224
        are in self.command, self.path, self.request_version and
        self.headers.
225

226
        Return True for success, False for failure; on failure, an
227
        error is sent back.
228

229
        """
230
        self.command = None  # set in case of error on the first line
231
        self.request_version = version = "HTTP/0.9" # Default
232
        self.close_connection = 1
233 234 235 236 237 238
        requestline = self.raw_requestline
        if requestline[-2:] == '\r\n':
            requestline = requestline[:-2]
        elif requestline[-1:] == '\n':
            requestline = requestline[:-1]
        self.requestline = requestline
239
        words = requestline.split()
240 241 242
        if len(words) == 3:
            [command, path, version] = words
            if version[:5] != 'HTTP/':
243
                self.send_error(400, "Bad request version (%r)" % version)
244
                return False
245
            try:
246 247 248 249 250 251 252 253 254 255 256 257
                base_version_number = version.split('/', 1)[1]
                version_number = base_version_number.split(".")
                # RFC 2145 section 3.1 says there can be only one "." and
                #   - major and minor numbers MUST be treated as
                #      separate integers;
                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
                #      turn is lower than HTTP/12.3;
                #   - Leading zeros MUST be ignored by recipients.
                if len(version_number) != 2:
                    raise ValueError
                version_number = int(version_number[0]), int(version_number[1])
            except (ValueError, IndexError):
258
                self.send_error(400, "Bad request version (%r)" % version)
259
                return False
260
            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
261
                self.close_connection = 0
262
            if version_number >= (2, 0):
263
                self.send_error(505,
264
                          "Invalid HTTP Version (%s)" % base_version_number)
265
                return False
266 267
        elif len(words) == 2:
            [command, path] = words
268
            self.close_connection = 1
269 270
            if command != 'GET':
                self.send_error(400,
271
                                "Bad HTTP/0.9 request type (%r)" % command)
272
                return False
273
        elif not words:
274
            return False
275
        else:
276
            self.send_error(400, "Bad request syntax (%r)" % requestline)
277
            return False
278
        self.command, self.path, self.request_version = command, path, version
279 280

        # Examine the headers and look for a Connection directive
281
        self.headers = self.MessageClass(self.rfile, 0)
282 283 284 285 286 287 288

        conntype = self.headers.get('Connection', "")
        if conntype.lower() == 'close':
            self.close_connection = 1
        elif (conntype.lower() == 'keep-alive' and
              self.protocol_version >= "HTTP/1.1"):
            self.close_connection = 0
289
        return True
290

291
    def handle_one_request(self):
292 293 294 295 296 297 298 299
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        self.raw_requestline = self.rfile.readline()
300 301 302
        if not self.raw_requestline:
            self.close_connection = 1
            return
303 304 305
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
306
        if not hasattr(self, mname):
307
            self.send_error(501, "Unsupported method (%r)" % self.command)
308 309 310
            return
        method = getattr(self, mname)
        method()
Guido van Rossum's avatar
Guido van Rossum committed
311

312 313 314 315 316 317 318 319
    def handle(self):
        """Handle multiple requests if necessary."""
        self.close_connection = 1

        self.handle_one_request()
        while not self.close_connection:
            self.handle_one_request()

Guido van Rossum's avatar
Guido van Rossum committed
320
    def send_error(self, code, message=None):
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
        """Send and log an error reply.

        Arguments are the error code, and a detailed message.
        The detailed message defaults to the short entry matching the
        response code.

        This sends an error response (so it must be called before any
        output has been generated), logs the error, and finally sends
        a piece of HTML explaining the error to the user.

        """

        try:
            short, long = self.responses[code]
        except KeyError:
            short, long = '???', '???'
337
        if message is None:
338 339 340
            message = short
        explain = long
        self.log_error("code %d, message %s", code, message)
341
        # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
342
        content = (self.error_message_format %
343
                   {'code': code, 'message': _quote_html(message), 'explain': explain})
344
        self.send_response(code, message)
345
        self.send_header("Content-Type", "text/html")
346
        self.send_header('Connection', 'close')
347
        self.end_headers()
348 349
        if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
            self.wfile.write(content)
Guido van Rossum's avatar
Guido van Rossum committed
350 351 352 353

    error_message_format = DEFAULT_ERROR_MESSAGE

    def send_response(self, code, message=None):
354 355 356 357 358 359 360 361
        """Send the response header and log the response code.

        Also send two standard headers with the server software
        version and the current date.

        """
        self.log_request(code)
        if message is None:
362
            if code in self.responses:
363 364 365 366
                message = self.responses[code][0]
            else:
                message = ''
        if self.request_version != 'HTTP/0.9':
367 368 369
            self.wfile.write("%s %d %s\r\n" %
                             (self.protocol_version, code, message))
            # print (self.protocol_version, code, message)
370 371
        self.send_header('Server', self.version_string())
        self.send_header('Date', self.date_time_string())
Guido van Rossum's avatar
Guido van Rossum committed
372 373

    def send_header(self, keyword, value):
374 375 376
        """Send a MIME header."""
        if self.request_version != 'HTTP/0.9':
            self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossum's avatar
Guido van Rossum committed
377

378 379 380 381 382 383
        if keyword.lower() == 'connection':
            if value.lower() == 'close':
                self.close_connection = 1
            elif value.lower() == 'keep-alive':
                self.close_connection = 0

Guido van Rossum's avatar
Guido van Rossum committed
384
    def end_headers(self):
385 386 387
        """Send the blank line ending the MIME headers."""
        if self.request_version != 'HTTP/0.9':
            self.wfile.write("\r\n")
Guido van Rossum's avatar
Guido van Rossum committed
388 389

    def log_request(self, code='-', size='-'):
390
        """Log an accepted request.
Guido van Rossum's avatar
Guido van Rossum committed
391

392
        This is called by send_reponse().
Guido van Rossum's avatar
Guido van Rossum committed
393

394
        """
Guido van Rossum's avatar
Guido van Rossum committed
395

396 397
        self.log_message('"%s" %s %s',
                         self.requestline, str(code), str(size))
Guido van Rossum's avatar
Guido van Rossum committed
398 399

    def log_error(self, *args):
400
        """Log an error.
Guido van Rossum's avatar
Guido van Rossum committed
401

402 403
        This is called when a request cannot be fulfilled.  By
        default it passes the message on to log_message().
Guido van Rossum's avatar
Guido van Rossum committed
404

405
        Arguments are the same as for log_message().
Guido van Rossum's avatar
Guido van Rossum committed
406

407
        XXX This should go to the separate error log.
Guido van Rossum's avatar
Guido van Rossum committed
408

409
        """
Guido van Rossum's avatar
Guido van Rossum committed
410

411
        self.log_message(*args)
Guido van Rossum's avatar
Guido van Rossum committed
412 413

    def log_message(self, format, *args):
414
        """Log an arbitrary message.
Guido van Rossum's avatar
Guido van Rossum committed
415

416 417
        This is used by all other logging functions.  Override
        it if you have specific logging wishes.
Guido van Rossum's avatar
Guido van Rossum committed
418

419 420 421 422 423
        The first argument, FORMAT, is a format string for the
        message to be logged.  If the format string contains
        any % escapes requiring parameters, they should be
        specified as subsequent arguments (it's just like
        printf!).
Guido van Rossum's avatar
Guido van Rossum committed
424

425 426
        The client host and current date/time are prefixed to
        every message.
Guido van Rossum's avatar
Guido van Rossum committed
427

428
        """
Guido van Rossum's avatar
Guido van Rossum committed
429

430 431 432 433
        sys.stderr.write("%s - - [%s] %s\n" %
                         (self.address_string(),
                          self.log_date_time_string(),
                          format%args))
Guido van Rossum's avatar
Guido van Rossum committed
434 435

    def version_string(self):
436 437
        """Return the server software version string."""
        return self.server_version + ' ' + self.sys_version
Guido van Rossum's avatar
Guido van Rossum committed
438 439

    def date_time_string(self):
440 441 442 443 444 445 446 447
        """Return the current date and time formatted for a message header."""
        now = time.time()
        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
                self.weekdayname[wd],
                day, self.monthname[month], year,
                hh, mm, ss)
        return s
Guido van Rossum's avatar
Guido van Rossum committed
448 449

    def log_date_time_string(self):
450 451 452 453 454 455
        """Return the current time formatted for logging."""
        now = time.time()
        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
                day, self.monthname[month], year, hh, mm, ss)
        return s
Guido van Rossum's avatar
Guido van Rossum committed
456 457 458 459

    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

    monthname = [None,
460 461
                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum's avatar
Guido van Rossum committed
462 463

    def address_string(self):
464
        """Return the client address formatted for logging.
Guido van Rossum's avatar
Guido van Rossum committed
465

466 467
        This version looks up the full hostname using gethostbyaddr(),
        and tries to find a name that contains at least one dot.
Guido van Rossum's avatar
Guido van Rossum committed
468

469
        """
Guido van Rossum's avatar
Guido van Rossum committed
470

471
        host, port = self.client_address[:2]
472
        return socket.getfqdn(host)
Guido van Rossum's avatar
Guido van Rossum committed
473 474 475 476

    # Essentially static class variables

    # The version of the HTTP protocol we support.
477
    # Set this to HTTP/1.1 to enable automatic keepalive
Guido van Rossum's avatar
Guido van Rossum committed
478 479 480 481 482 483 484 485 486
    protocol_version = "HTTP/1.0"

    # The Message-like class used to parse headers
    MessageClass = mimetools.Message

    # Table mapping response codes to messages; entries have the
    # form {code: (shortmessage, longmessage)}.
    # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
    responses = {
487 488 489 490
        100: ('Continue', 'Request received, please continue'),
        101: ('Switching Protocols',
              'Switching to new protocol; obey Upgrade header'),

491 492 493 494
        200: ('OK', 'Request fulfilled, document follows'),
        201: ('Created', 'Document created, URL follows'),
        202: ('Accepted',
              'Request accepted, processing continues off-line'),
495
        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
496
        204: ('No response', 'Request fulfilled, nothing follows'),
497 498
        205: ('Reset Content', 'Clear input form for further input.'),
        206: ('Partial Content', 'Partial content follows.'),
Tim Peters's avatar
Tim Peters committed
499

500 501 502
        300: ('Multiple Choices',
              'Object has several resources -- see URI list'),
        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
503
        302: ('Found', 'Object moved temporarily -- see URI list'),
504
        303: ('See Other', 'Object moved -- see Method and URL list'),
505
        304: ('Not modified',
506 507 508 509 510 511
              'Document has not changed since given time'),
        305: ('Use Proxy',
              'You must use proxy specified in Location to access this '
              'resource.'),
        307: ('Temporary Redirect',
              'Object moved temporarily -- see URI list'),
Tim Peters's avatar
Tim Peters committed
512

513 514 515 516 517 518 519 520
        400: ('Bad request',
              'Bad request syntax or unsupported method'),
        401: ('Unauthorized',
              'No permission -- see authorization schemes'),
        402: ('Payment required',
              'No payment -- see charging schemes'),
        403: ('Forbidden',
              'Request forbidden -- authorization will not help'),
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
        404: ('Not Found', 'Nothing matches the given URI'),
        405: ('Method Not Allowed',
              'Specified method is invalid for this server.'),
        406: ('Not Acceptable', 'URI not available in preferred format.'),
        407: ('Proxy Authentication Required', 'You must authenticate with '
              'this proxy before proceeding.'),
        408: ('Request Time-out', 'Request timed out; try again later.'),
        409: ('Conflict', 'Request conflict.'),
        410: ('Gone',
              'URI no longer exists and has been permanently removed.'),
        411: ('Length Required', 'Client must specify Content-Length.'),
        412: ('Precondition Failed', 'Precondition in headers is false.'),
        413: ('Request Entity Too Large', 'Entity is too large.'),
        414: ('Request-URI Too Long', 'URI is too long.'),
        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
        416: ('Requested Range Not Satisfiable',
              'Cannot satisfy request range.'),
        417: ('Expectation Failed',
              'Expect condition could not be satisfied.'),
Tim Peters's avatar
Tim Peters committed
540

541
        500: ('Internal error', 'Server got itself in trouble'),
542
        501: ('Not Implemented',
543
              'Server does not support this operation'),
544 545
        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
        503: ('Service temporarily overloaded',
546
              'The server cannot process the request due to a high load'),
547
        504: ('Gateway timeout',
548
              'The gateway server did not receive a timely response'),
549
        505: ('HTTP Version not supported', 'Cannot fulfill request.'),
550
        }
Guido van Rossum's avatar
Guido van Rossum committed
551 552 553


def test(HandlerClass = BaseHTTPRequestHandler,
554
         ServerClass = HTTPServer, protocol="HTTP/1.0"):
Guido van Rossum's avatar
Guido van Rossum committed
555 556 557 558 559 560 561 562
    """Test the HTTP request handler class.

    This runs an HTTP server on port 8000 (or the first command line
    argument).

    """

    if sys.argv[1:]:
563
        port = int(sys.argv[1])
Guido van Rossum's avatar
Guido van Rossum committed
564
    else:
565
        port = 8000
Guido van Rossum's avatar
Guido van Rossum committed
566 567
    server_address = ('', port)

568
    HandlerClass.protocol_version = protocol
Guido van Rossum's avatar
Guido van Rossum committed
569 570
    httpd = ServerClass(server_address, HandlerClass)

571 572
    sa = httpd.socket.getsockname()
    print "Serving HTTP on", sa[0], "port", sa[1], "..."
Guido van Rossum's avatar
Guido van Rossum committed
573 574 575 576 577
    httpd.serve_forever()


if __name__ == '__main__':
    test()