BaseHTTPServer.py 20.6 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4
"""HTTP server base class.

Note: the class in this module doesn't implement any HTTP request; see
SimpleHTTPServer for simple implementations of GET, HEAD and POST
5 6
(including CGI scripts).  It does, however, optionally implement HTTP/1.1
persistent connections, as of version 0.3.
Guido van Rossum's avatar
Guido van Rossum committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28

Contents:

- BaseHTTPRequestHandler: HTTP request handler base class
- test: test function

XXX To do:

- log requests even later (to capture byte count)
- log user-agent header and other interesting goodies
- send error log to separate file
"""


# See also:
#
# HTTP Working Group                                        T. Berners-Lee
# INTERNET-DRAFT                                            R. T. Fielding
# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
# Expires September 8, 1995                                  March 8, 1995
#
# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
29 30 31 32 33 34
#
# and
#
# Network Working Group                                      R. Fielding
# Request for Comments: 2616                                       et al
# Obsoletes: 2068                                              June 1999
Tim Peters's avatar
Tim Peters committed
35
# Category: Standards Track
36 37
#
# URL: http://www.faqs.org/rfcs/rfc2616.html
Guido van Rossum's avatar
Guido van Rossum committed
38 39 40

# Log files
# ---------
Tim Peters's avatar
Tim Peters committed
41
#
Guido van Rossum's avatar
Guido van Rossum committed
42
# Here's a quote from the NCSA httpd docs about log file format.
Tim Peters's avatar
Tim Peters committed
43 44 45 46 47 48
#
# | The logfile format is as follows. Each line consists of:
# |
# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
# |
# |        host: Either the DNS name or the IP number of the remote client
Guido van Rossum's avatar
Guido van Rossum committed
49
# |        rfc931: Any information returned by identd for this person,
Tim Peters's avatar
Tim Peters committed
50
# |                - otherwise.
Guido van Rossum's avatar
Guido van Rossum committed
51
# |        authuser: If user sent a userid for authentication, the user name,
Tim Peters's avatar
Tim Peters committed
52 53 54 55 56 57 58 59 60
# |                  - otherwise.
# |        DD: Day
# |        Mon: Month (calendar name)
# |        YYYY: Year
# |        hh: hour (24-hour format, the machine's timezone)
# |        mm: minutes
# |        ss: seconds
# |        request: The first line of the HTTP request as sent by the client.
# |        ddd: the status code returned by the server, - if not available.
Guido van Rossum's avatar
Guido van Rossum committed
61
# |        bbbb: the total number of bytes sent,
Tim Peters's avatar
Tim Peters committed
62 63
# |              *not including the HTTP/1.0 header*, - if not available
# |
Guido van Rossum's avatar
Guido van Rossum committed
64
# | You can determine the name of the file accessed through request.
Tim Peters's avatar
Tim Peters committed
65
#
Guido van Rossum's avatar
Guido van Rossum committed
66 67 68
# (Actually, the latter is only true if you know the server configuration
# at the time the request was made!)

69
__version__ = "0.3"
Guido van Rossum's avatar
Guido van Rossum committed
70

71
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
Guido van Rossum's avatar
Guido van Rossum committed
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

import sys
import time
import socket # For gethostbyaddr()
import mimetools
import SocketServer

# Default error message
DEFAULT_ERROR_MESSAGE = """\
<head>
<title>Error response</title>
</head>
<body>
<h1>Error response</h1>
<p>Error code %(code)d.
<p>Message: %(message)s.
<p>Error code explanation: %(code)s = %(explain)s.
</body>
"""


class HTTPServer(SocketServer.TCPServer):

95 96
    allow_reuse_address = 1    # Seems to make sense in testing environment

Guido van Rossum's avatar
Guido van Rossum committed
97
    def server_bind(self):
98 99
        """Override server_bind to store the server name."""
        SocketServer.TCPServer.server_bind(self)
100
        host, port = self.socket.getsockname()[:2]
101
        self.server_name = socket.getfqdn(host)
102
        self.server_port = port
Guido van Rossum's avatar
Guido van Rossum committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129


class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):

    """HTTP request handler base class.

    The following explanation of HTTP serves to guide you through the
    code as well as to expose any misunderstandings I may have about
    HTTP (so you don't need to read the code to figure out I'm wrong
    :-).

    HTTP (HyperText Transfer Protocol) is an extensible protocol on
    top of a reliable stream transport (e.g. TCP/IP).  The protocol
    recognizes three parts to a request:

    1. One line identifying the request type and path
    2. An optional set of RFC-822-style headers
    3. An optional data part

    The headers and data are separated by a blank line.

    The first line of the request has the form

    <command> <path> <version>

    where <command> is a (case-sensitive) keyword such as GET or POST,
    <path> is a string containing path information for the request,
130 131 132
    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
    <path> is encoded using the URL encoding scheme (using %xx to signify
    the ASCII character with hex code xx).
Guido van Rossum's avatar
Guido van Rossum committed
133

134 135 136 137 138
    The specification specifies that lines are separated by CRLF but
    for compatibility with the widest range of clients recommends
    servers also handle LF.  Similarly, whitespace in the request line
    is treated sensibly (allowing multiple spaces between components
    and allowing trailing whitespace).
Guido van Rossum's avatar
Guido van Rossum committed
139 140 141 142 143 144 145 146 147 148 149 150

    Similarly, for output, lines ought to be separated by CRLF pairs
    but most clients grok LF characters just fine.

    If the first line of the request has the form

    <command> <path>

    (i.e. <version> is left out) then this is assumed to be an HTTP
    0.9 request; this form has no optional headers and data part and
    the reply consists of just the data.

151
    The reply form of the HTTP 1.x protocol again has three parts:
Guido van Rossum's avatar
Guido van Rossum committed
152 153 154 155 156 157 158 159 160 161 162

    1. One line giving the response code
    2. An optional set of RFC-822-style headers
    3. The data

    Again, the headers and data are separated by a blank line.

    The response code line has the form

    <version> <responsecode> <responsestring>

163
    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
Guido van Rossum's avatar
Guido van Rossum committed
164 165 166 167 168 169
    <responsecode> is a 3-digit response code indicating success or
    failure of the request, and <responsestring> is an optional
    human-readable string explaining what the response code means.

    This server parses the request and the headers, and then calls a
    function specific to the request type (<command>).  Specifically,
170
    a request SPAM will be handled by a method do_SPAM().  If no
Guido van Rossum's avatar
Guido van Rossum committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
    such method exists the server sends an error response to the
    client.  If it exists, it is called with no arguments:

    do_SPAM()

    Note that the request name is case sensitive (i.e. SPAM and spam
    are different requests).

    The various request details are stored in instance variables:

    - client_address is the client IP address in the form (host,
    port);

    - command, path and version are the broken-down request line;

    - headers is an instance of mimetools.Message (or a derived
    class) containing the header information;

    - rfile is a file object open for reading positioned at the
    start of the optional input data part;

    - wfile is a file object open for writing.

    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!

    The first thing to be written must be the response line.  Then
    follow 0 or more header lines, then a blank line, and then the
    actual data (if any).  The meaning of the header lines depends on
    the command executed by the server; in most cases, when data is
    returned, there should be at least one header line of the form

    Content-type: <type>/<subtype>

    where <type> and <subtype> should be registered MIME types,
    e.g. "text/html" or "text/plain".

    """

    # The Python system version, truncated to its first component.
210
    sys_version = "Python/" + sys.version.split()[0]
Guido van Rossum's avatar
Guido van Rossum committed
211 212 213 214 215 216

    # The server software version.  You may want to override this.
    # The format is multiple whitespace-separated strings,
    # where each string is of the form name[/version].
    server_version = "BaseHTTP/" + __version__

217 218
    def parse_request(self):
        """Parse a request (internal).
219

220
        The request should be stored in self.raw_requestline; the results
221 222
        are in self.command, self.path, self.request_version and
        self.headers.
223

224
        Return True for success, False for failure; on failure, an
225
        error is sent back.
226

227
        """
228
        self.command = None  # set in case of error on the first line
229
        self.request_version = version = "HTTP/0.9" # Default
230
        self.close_connection = 1
231 232 233 234 235 236
        requestline = self.raw_requestline
        if requestline[-2:] == '\r\n':
            requestline = requestline[:-2]
        elif requestline[-1:] == '\n':
            requestline = requestline[:-1]
        self.requestline = requestline
237
        words = requestline.split()
238 239 240
        if len(words) == 3:
            [command, path, version] = words
            if version[:5] != 'HTTP/':
241
                self.send_error(400, "Bad request version (%r)" % version)
242
                return False
243
            try:
244 245 246 247 248 249 250 251 252 253 254 255
                base_version_number = version.split('/', 1)[1]
                version_number = base_version_number.split(".")
                # RFC 2145 section 3.1 says there can be only one "." and
                #   - major and minor numbers MUST be treated as
                #      separate integers;
                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
                #      turn is lower than HTTP/12.3;
                #   - Leading zeros MUST be ignored by recipients.
                if len(version_number) != 2:
                    raise ValueError
                version_number = int(version_number[0]), int(version_number[1])
            except (ValueError, IndexError):
256
                self.send_error(400, "Bad request version (%r)" % version)
257
                return False
258
            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
259
                self.close_connection = 0
260
            if version_number >= (2, 0):
261
                self.send_error(505,
262
                          "Invalid HTTP Version (%s)" % base_version_number)
263
                return False
264 265
        elif len(words) == 2:
            [command, path] = words
266
            self.close_connection = 1
267 268
            if command != 'GET':
                self.send_error(400,
269
                                "Bad HTTP/0.9 request type (%r)" % command)
270
                return False
271
        elif not words:
272
            return False
273
        else:
274
            self.send_error(400, "Bad request syntax (%r)" % requestline)
275
            return False
276
        self.command, self.path, self.request_version = command, path, version
277 278

        # Examine the headers and look for a Connection directive
279
        self.headers = self.MessageClass(self.rfile, 0)
280 281 282 283 284 285 286

        conntype = self.headers.get('Connection', "")
        if conntype.lower() == 'close':
            self.close_connection = 1
        elif (conntype.lower() == 'keep-alive' and
              self.protocol_version >= "HTTP/1.1"):
            self.close_connection = 0
287
        return True
288

289
    def handle_one_request(self):
290 291 292 293 294 295 296 297
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        self.raw_requestline = self.rfile.readline()
298 299 300
        if not self.raw_requestline:
            self.close_connection = 1
            return
301 302 303
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
304
        if not hasattr(self, mname):
305
            self.send_error(501, "Unsupported method (%r)" % self.command)
306 307 308
            return
        method = getattr(self, mname)
        method()
Guido van Rossum's avatar
Guido van Rossum committed
309

310 311 312 313 314 315 316 317
    def handle(self):
        """Handle multiple requests if necessary."""
        self.close_connection = 1

        self.handle_one_request()
        while not self.close_connection:
            self.handle_one_request()

Guido van Rossum's avatar
Guido van Rossum committed
318
    def send_error(self, code, message=None):
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
        """Send and log an error reply.

        Arguments are the error code, and a detailed message.
        The detailed message defaults to the short entry matching the
        response code.

        This sends an error response (so it must be called before any
        output has been generated), logs the error, and finally sends
        a piece of HTML explaining the error to the user.

        """

        try:
            short, long = self.responses[code]
        except KeyError:
            short, long = '???', '???'
335
        if message is None:
336 337 338
            message = short
        explain = long
        self.log_error("code %d, message %s", code, message)
339 340
        content = (self.error_message_format %
                   {'code': code, 'message': message, 'explain': explain})
341
        self.send_response(code, message)
342
        self.send_header("Content-Type", "text/html")
343
        self.send_header('Connection', 'close')
344
        self.end_headers()
345 346
        if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
            self.wfile.write(content)
Guido van Rossum's avatar
Guido van Rossum committed
347 348 349 350

    error_message_format = DEFAULT_ERROR_MESSAGE

    def send_response(self, code, message=None):
351 352 353 354 355 356 357 358
        """Send the response header and log the response code.

        Also send two standard headers with the server software
        version and the current date.

        """
        self.log_request(code)
        if message is None:
359
            if code in self.responses:
360 361 362 363
                message = self.responses[code][0]
            else:
                message = ''
        if self.request_version != 'HTTP/0.9':
364 365 366
            self.wfile.write("%s %d %s\r\n" %
                             (self.protocol_version, code, message))
            # print (self.protocol_version, code, message)
367 368
        self.send_header('Server', self.version_string())
        self.send_header('Date', self.date_time_string())
Guido van Rossum's avatar
Guido van Rossum committed
369 370

    def send_header(self, keyword, value):
371 372 373
        """Send a MIME header."""
        if self.request_version != 'HTTP/0.9':
            self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossum's avatar
Guido van Rossum committed
374

375 376 377 378 379 380
        if keyword.lower() == 'connection':
            if value.lower() == 'close':
                self.close_connection = 1
            elif value.lower() == 'keep-alive':
                self.close_connection = 0

Guido van Rossum's avatar
Guido van Rossum committed
381
    def end_headers(self):
382 383 384
        """Send the blank line ending the MIME headers."""
        if self.request_version != 'HTTP/0.9':
            self.wfile.write("\r\n")
Guido van Rossum's avatar
Guido van Rossum committed
385 386

    def log_request(self, code='-', size='-'):
387
        """Log an accepted request.
Guido van Rossum's avatar
Guido van Rossum committed
388

389
        This is called by send_reponse().
Guido van Rossum's avatar
Guido van Rossum committed
390

391
        """
Guido van Rossum's avatar
Guido van Rossum committed
392

393 394
        self.log_message('"%s" %s %s',
                         self.requestline, str(code), str(size))
Guido van Rossum's avatar
Guido van Rossum committed
395 396

    def log_error(self, *args):
397
        """Log an error.
Guido van Rossum's avatar
Guido van Rossum committed
398

399 400
        This is called when a request cannot be fulfilled.  By
        default it passes the message on to log_message().
Guido van Rossum's avatar
Guido van Rossum committed
401

402
        Arguments are the same as for log_message().
Guido van Rossum's avatar
Guido van Rossum committed
403

404
        XXX This should go to the separate error log.
Guido van Rossum's avatar
Guido van Rossum committed
405

406
        """
Guido van Rossum's avatar
Guido van Rossum committed
407

408
        self.log_message(*args)
Guido van Rossum's avatar
Guido van Rossum committed
409 410

    def log_message(self, format, *args):
411
        """Log an arbitrary message.
Guido van Rossum's avatar
Guido van Rossum committed
412

413 414
        This is used by all other logging functions.  Override
        it if you have specific logging wishes.
Guido van Rossum's avatar
Guido van Rossum committed
415

416 417 418 419 420
        The first argument, FORMAT, is a format string for the
        message to be logged.  If the format string contains
        any % escapes requiring parameters, they should be
        specified as subsequent arguments (it's just like
        printf!).
Guido van Rossum's avatar
Guido van Rossum committed
421

422 423
        The client host and current date/time are prefixed to
        every message.
Guido van Rossum's avatar
Guido van Rossum committed
424

425
        """
Guido van Rossum's avatar
Guido van Rossum committed
426

427 428 429 430
        sys.stderr.write("%s - - [%s] %s\n" %
                         (self.address_string(),
                          self.log_date_time_string(),
                          format%args))
Guido van Rossum's avatar
Guido van Rossum committed
431 432

    def version_string(self):
433 434
        """Return the server software version string."""
        return self.server_version + ' ' + self.sys_version
Guido van Rossum's avatar
Guido van Rossum committed
435 436

    def date_time_string(self):
437 438 439 440 441 442 443 444
        """Return the current date and time formatted for a message header."""
        now = time.time()
        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
                self.weekdayname[wd],
                day, self.monthname[month], year,
                hh, mm, ss)
        return s
Guido van Rossum's avatar
Guido van Rossum committed
445 446

    def log_date_time_string(self):
447 448 449 450 451 452
        """Return the current time formatted for logging."""
        now = time.time()
        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
                day, self.monthname[month], year, hh, mm, ss)
        return s
Guido van Rossum's avatar
Guido van Rossum committed
453 454 455 456

    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

    monthname = [None,
457 458
                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum's avatar
Guido van Rossum committed
459 460

    def address_string(self):
461
        """Return the client address formatted for logging.
Guido van Rossum's avatar
Guido van Rossum committed
462

463 464
        This version looks up the full hostname using gethostbyaddr(),
        and tries to find a name that contains at least one dot.
Guido van Rossum's avatar
Guido van Rossum committed
465

466
        """
Guido van Rossum's avatar
Guido van Rossum committed
467

468
        host, port = self.client_address[:2]
469
        return socket.getfqdn(host)
Guido van Rossum's avatar
Guido van Rossum committed
470 471 472 473

    # Essentially static class variables

    # The version of the HTTP protocol we support.
474
    # Set this to HTTP/1.1 to enable automatic keepalive
Guido van Rossum's avatar
Guido van Rossum committed
475 476 477 478 479 480 481 482 483
    protocol_version = "HTTP/1.0"

    # The Message-like class used to parse headers
    MessageClass = mimetools.Message

    # Table mapping response codes to messages; entries have the
    # form {code: (shortmessage, longmessage)}.
    # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
    responses = {
484 485 486 487
        100: ('Continue', 'Request received, please continue'),
        101: ('Switching Protocols',
              'Switching to new protocol; obey Upgrade header'),

488 489 490 491
        200: ('OK', 'Request fulfilled, document follows'),
        201: ('Created', 'Document created, URL follows'),
        202: ('Accepted',
              'Request accepted, processing continues off-line'),
492
        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
493
        204: ('No response', 'Request fulfilled, nothing follows'),
494 495
        205: ('Reset Content', 'Clear input form for further input.'),
        206: ('Partial Content', 'Partial content follows.'),
Tim Peters's avatar
Tim Peters committed
496

497 498 499
        300: ('Multiple Choices',
              'Object has several resources -- see URI list'),
        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
500
        302: ('Found', 'Object moved temporarily -- see URI list'),
501
        303: ('See Other', 'Object moved -- see Method and URL list'),
502
        304: ('Not modified',
503 504 505 506 507 508
              'Document has not changed since given time'),
        305: ('Use Proxy',
              'You must use proxy specified in Location to access this '
              'resource.'),
        307: ('Temporary Redirect',
              'Object moved temporarily -- see URI list'),
Tim Peters's avatar
Tim Peters committed
509

510 511 512 513 514 515 516 517
        400: ('Bad request',
              'Bad request syntax or unsupported method'),
        401: ('Unauthorized',
              'No permission -- see authorization schemes'),
        402: ('Payment required',
              'No payment -- see charging schemes'),
        403: ('Forbidden',
              'Request forbidden -- authorization will not help'),
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
        404: ('Not Found', 'Nothing matches the given URI'),
        405: ('Method Not Allowed',
              'Specified method is invalid for this server.'),
        406: ('Not Acceptable', 'URI not available in preferred format.'),
        407: ('Proxy Authentication Required', 'You must authenticate with '
              'this proxy before proceeding.'),
        408: ('Request Time-out', 'Request timed out; try again later.'),
        409: ('Conflict', 'Request conflict.'),
        410: ('Gone',
              'URI no longer exists and has been permanently removed.'),
        411: ('Length Required', 'Client must specify Content-Length.'),
        412: ('Precondition Failed', 'Precondition in headers is false.'),
        413: ('Request Entity Too Large', 'Entity is too large.'),
        414: ('Request-URI Too Long', 'URI is too long.'),
        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
        416: ('Requested Range Not Satisfiable',
              'Cannot satisfy request range.'),
        417: ('Expectation Failed',
              'Expect condition could not be satisfied.'),
Tim Peters's avatar
Tim Peters committed
537

538
        500: ('Internal error', 'Server got itself in trouble'),
539
        501: ('Not Implemented',
540
              'Server does not support this operation'),
541 542
        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
        503: ('Service temporarily overloaded',
543
              'The server cannot process the request due to a high load'),
544
        504: ('Gateway timeout',
545
              'The gateway server did not receive a timely response'),
546
        505: ('HTTP Version not supported', 'Cannot fulfill request.'),
547
        }
Guido van Rossum's avatar
Guido van Rossum committed
548 549 550


def test(HandlerClass = BaseHTTPRequestHandler,
551
         ServerClass = HTTPServer, protocol="HTTP/1.0"):
Guido van Rossum's avatar
Guido van Rossum committed
552 553 554 555 556 557 558 559
    """Test the HTTP request handler class.

    This runs an HTTP server on port 8000 (or the first command line
    argument).

    """

    if sys.argv[1:]:
560
        port = int(sys.argv[1])
Guido van Rossum's avatar
Guido van Rossum committed
561
    else:
562
        port = 8000
Guido van Rossum's avatar
Guido van Rossum committed
563 564
    server_address = ('', port)

565
    HandlerClass.protocol_version = protocol
Guido van Rossum's avatar
Guido van Rossum committed
566 567
    httpd = ServerClass(server_address, HandlerClass)

568 569
    sa = httpd.socket.getsockname()
    print "Serving HTTP on", sa[0], "port", sa[1], "..."
Guido van Rossum's avatar
Guido van Rossum committed
570 571 572 573 574
    httpd.serve_forever()


if __name__ == '__main__':
    test()