httplib.py 42.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
"""HTTP/1.1 client library

<intro stuff goes here>
<other stuff, too>

HTTPConnection go through a number of "states", which defines when a client
may legally make another request or fetch the response for a particular
request. This diagram details these state transitions:

    (null)
      |
      | HTTPConnection()
      v
    Idle
      |
      | putrequest()
      v
    Request-started
      |
      | ( putheader() )*  endheaders()
      v
    Request-sent
      |
      | response = getresponse()
      v
    Unread-response   [Response-headers-read]
      |\____________________
28 29 30 31 32 33 34 35 36 37 38 39 40
      |                     |
      | response.read()     | putrequest()
      v                     v
    Idle                  Req-started-unread-response
                     ______/|
                   /        |
   response.read() |        | ( putheader() )*  endheaders()
                   v        v
       Request-started    Req-sent-unread-response
                            |
                            | response.read()
                            v
                          Request-sent
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66

This diagram presents the following rules:
  -- a second request may not be started until {response-headers-read}
  -- a response [object] cannot be retrieved until {request-sent}
  -- there is no differentiation between an unread response body and a
     partially read response body

Note: this enforcement is applied by the HTTPConnection class. The
      HTTPResponse class does not enforce this state machine, which
      implies sophisticated clients may accelerate the request/response
      pipeline. Caution should be taken, though: accelerating the states
      beyond the above pattern may imply knowledge of the server's
      connection-close behavior for certain requests. For example, it
      is impossible to tell whether the server will close the connection
      UNTIL the response headers have been read; this means that further
      requests cannot be placed into the pipeline until it is known that
      the server will NOT be closing the connection.

Logical State                  __state            __response
-------------                  -------            ----------
Idle                           _CS_IDLE           None
Request-started                _CS_REQ_STARTED    None
Request-sent                   _CS_REQ_SENT       None
Unread-response                _CS_IDLE           <response_class>
Req-started-unread-response    _CS_REQ_STARTED    <response_class>
Req-sent-unread-response       _CS_REQ_SENT       <response_class>
67
"""
68

Jeremy Hylton's avatar
Jeremy Hylton committed
69
import errno
70
import mimetools
Jeremy Hylton's avatar
Jeremy Hylton committed
71
import socket
72
from urlparse import urlsplit
73

74
try:
75
    from cStringIO import StringIO
76
except ImportError:
77
    from StringIO import StringIO
78

79 80
__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
           "HTTPException", "NotConnected", "UnknownProtocol",
81 82 83 84
           "UnknownTransferEncoding", "UnimplementedFileMode",
           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
           "BadStatusLine", "error"]
85

86
HTTP_PORT = 80
87 88
HTTPS_PORT = 443

89 90 91 92 93 94 95
_UNKNOWN = 'UNKNOWN'

# connection states
_CS_IDLE = 'Idle'
_CS_REQ_STARTED = 'Request-started'
_CS_REQ_SENT = 'Request-sent'

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
class HTTPMessage(mimetools.Message):

    def addheader(self, key, value):
        """Add header for field key handling repeats."""
        prev = self.dict.get(key)
        if prev is None:
            self.dict[key] = value
        else:
            combined = ", ".join((prev, value))
            self.dict[key] = combined

    def addcontinue(self, key, more):
        """Add more field data from a continuation line."""
        prev = self.dict[key]
        self.dict[key] = prev + "\n " + more

    def readheaders(self):
        """Read header lines.

        Read header lines up to the entirely blank line that terminates them.
        The (normally blank) line that ends the headers is skipped, but not
        included in the returned list.  If a non-header line ends the headers,
        (which is an error), an attempt is made to backspace over it; it is
        never included in the returned list.

        The variable self.status is set to the empty string if all went well,
        otherwise it is an error message.  The variable self.headers is a
        completely uninterpreted list of lines contained in the header (so
        printing them will reproduce the header exactly as it appears in the
        file).

        If multiple header fields with the same name occur, they are combined
        according to the rules in RFC 2616 sec 4.2:

        Appending each subsequent field-value to the first, each separated
        by a comma. The order in which header fields with the same field-name
        are received is significant to the interpretation of the combined
        field value.
        """
        # XXX The implementation overrides the readheaders() method of
        # rfc822.Message.  The base class design isn't amenable to
        # customized behavior here so the method here is a copy of the
        # base class code with a few small changes.

        self.dict = {}
        self.unixfrom = ''
Raymond Hettinger's avatar
Raymond Hettinger committed
142
        self.headers = hlist = []
143 144 145 146 147 148 149 150
        self.status = ''
        headerseen = ""
        firstline = 1
        startofline = unread = tell = None
        if hasattr(self.fp, 'unread'):
            unread = self.fp.unread
        elif self.seekable:
            tell = self.fp.tell
Raymond Hettinger's avatar
Raymond Hettinger committed
151
        while True:
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
            if tell:
                try:
                    startofline = tell()
                except IOError:
                    startofline = tell = None
                    self.seekable = 0
            line = self.fp.readline()
            if not line:
                self.status = 'EOF in headers'
                break
            # Skip unix From name time lines
            if firstline and line.startswith('From '):
                self.unixfrom = self.unixfrom + line
                continue
            firstline = 0
            if headerseen and line[0] in ' \t':
                # XXX Not sure if continuation lines are handled properly
                # for http and/or for repeating headers
                # It's a continuation line.
Raymond Hettinger's avatar
Raymond Hettinger committed
171
                hlist.append(line)
172 173 174 175 176 177 178 179 180 181 182
                self.addcontinue(headerseen, line.strip())
                continue
            elif self.iscomment(line):
                # It's a comment.  Ignore it.
                continue
            elif self.islast(line):
                # Note! No pushback here!  The delimiter line gets eaten.
                break
            headerseen = self.isheader(line)
            if headerseen:
                # It's a legal header line, save it.
Raymond Hettinger's avatar
Raymond Hettinger committed
183
                hlist.append(line)
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
                self.addheader(headerseen, line[len(headerseen)+1:].strip())
                continue
            else:
                # It's not a header line; throw it back and stop here.
                if not self.dict:
                    self.status = 'No headers'
                else:
                    self.status = 'Non-header line where header expected'
                # Try to undo the read.
                if unread:
                    unread(line)
                elif tell:
                    self.fp.seek(startofline)
                else:
                    self.status = self.status + '; bad seek'
                break
200 201

class HTTPResponse:
202 203 204

    # strict: If true, raise BadStatusLine if the status line can't be
    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
Skip Montanaro's avatar
Skip Montanaro committed
205
    # false because it prevents clients from talking to HTTP/0.9
206 207 208 209 210
    # servers.  Note that a response with a sufficiently corrupted
    # status line will look like an HTTP/0.9 response.

    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.

211
    def __init__(self, sock, debuglevel=0, strict=0, method=None):
212
        self.fp = sock.makefile('rb', 0)
213
        self.debuglevel = debuglevel
214
        self.strict = strict
215
        self._method = method
216 217 218 219

        self.msg = None

        # from the Status-Line of the response
Tim Peters's avatar
Tim Peters committed
220 221 222
        self.version = _UNKNOWN # HTTP-Version
        self.status = _UNKNOWN  # Status-Code
        self.reason = _UNKNOWN  # Reason-Phrase
223

Tim Peters's avatar
Tim Peters committed
224 225 226 227
        self.chunked = _UNKNOWN         # is "chunked" being used?
        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
        self.length = _UNKNOWN          # number of bytes left in response
        self.will_close = _UNKNOWN      # conn will close at end of response
228

229
    def _read_status(self):
230
        # Initialize with Simple-Response defaults
231
        line = self.fp.readline()
232 233
        if self.debuglevel > 0:
            print "reply:", repr(line)
234 235 236 237
        if not line:
            # Presumably, the server closed the connection before
            # sending a valid response.
            raise BadStatusLine(line)
238
        try:
239
            [version, status, reason] = line.split(None, 2)
240 241
        except ValueError:
            try:
242
                [version, status] = line.split(None, 1)
243 244
                reason = ""
            except ValueError:
245 246 247 248 249 250 251 252 253 254 255
                # empty version will cause next test to fail and status
                # will be treated as 0.9 response.
                version = ""
        if not version.startswith('HTTP/'):
            if self.strict:
                self.close()
                raise BadStatusLine(line)
            else:
                # assume it's a Simple-Response from an 0.9 server
                self.fp = LineAndFileWrapper(line, self.fp)
                return "HTTP/0.9", 200, ""
256

257 258
        # The status code is a three-digit number
        try:
259
            status = int(status)
260 261 262 263
            if status < 100 or status > 999:
                raise BadStatusLine(line)
        except ValueError:
            raise BadStatusLine(line)
264 265
        return version, status, reason

266
    def begin(self):
267 268 269
        if self.msg is not None:
            # we've already started reading the response
            return
270

271
        # read until we get a non-100 response
Raymond Hettinger's avatar
Raymond Hettinger committed
272
        while True:
273 274 275 276
            version, status, reason = self._read_status()
            if status != 100:
                break
            # skip the header from the 100 response
Raymond Hettinger's avatar
Raymond Hettinger committed
277
            while True:
278 279 280 281 282
                skip = self.fp.readline().strip()
                if not skip:
                    break
                if self.debuglevel > 0:
                    print "header:", skip
Tim Peters's avatar
Tim Peters committed
283

284 285
        self.status = status
        self.reason = reason.strip()
286 287
        if version == 'HTTP/1.0':
            self.version = 10
288
        elif version.startswith('HTTP/1.'):
Tim Peters's avatar
Tim Peters committed
289
            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
290 291
        elif version == 'HTTP/0.9':
            self.version = 9
292 293
        else:
            raise UnknownProtocol(version)
294

295
        if self.version == 9:
296
            self.chunked = 0
297
            self.will_close = 1
298
            self.msg = HTTPMessage(StringIO())
299 300
            return

301
        self.msg = HTTPMessage(self.fp, 0)
302 303 304
        if self.debuglevel > 0:
            for hdr in self.msg.headers:
                print "header:", hdr,
305 306 307 308 309 310

        # don't let the msg keep an fp
        self.msg.fp = None

        # are we using the chunked-style of transfer encoding?
        tr_enc = self.msg.getheader('transfer-encoding')
311
        if tr_enc and tr_enc.lower() == "chunked":
312 313 314 315 316 317
            self.chunked = 1
            self.chunk_left = None
        else:
            self.chunked = 0

        # will the connection close at the end of the response?
318
        self.will_close = self._check_close()
319 320 321 322 323

        # do we have a Content-Length?
        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
        length = self.msg.getheader('content-length')
        if length and not self.chunked:
324 325 326 327
            try:
                self.length = int(length)
            except ValueError:
                self.length = None
328 329 330 331
        else:
            self.length = None

        # does the body have a fixed length? (of zero)
Tim Peters's avatar
Tim Peters committed
332 333
        if (status == 204 or            # No Content
            status == 304 or            # Not Modified
334 335
            100 <= status < 200 or      # 1xx codes
            self._method == 'HEAD'):
336 337 338 339 340 341 342 343 344 345
            self.length = 0

        # if the connection remains open, and we aren't using chunked, and
        # a content-length was not provided, then assume that the connection
        # WILL close.
        if not self.will_close and \
           not self.chunked and \
           self.length is None:
            self.will_close = 1

346 347 348 349 350
    def _check_close(self):
        if self.version == 11:
            # An HTTP/1.1 proxy is assumed to stay open unless
            # explicitly closed.
            conn = self.msg.getheader('connection')
351
            if conn and "close" in conn.lower():
352 353 354 355 356 357 358 359 360
                return True
            return False

        # An HTTP/1.0 response with a Connection header is probably
        # the result of a confused proxy.  Ignore it.

        # For older HTTP, Keep-Alive indiciates persistent connection.
        if self.msg.getheader('keep-alive'):
            return False
Tim Peters's avatar
Tim Peters committed
361

362 363
        # Proxy-Connection is a netscape hack.
        pconn = self.msg.getheader('proxy-connection')
364
        if pconn and "keep-alive" in pconn.lower():
365 366 367 368 369
            return False

        # otherwise, assume it will close
        return True

370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
    def close(self):
        if self.fp:
            self.fp.close()
            self.fp = None

    def isclosed(self):
        # NOTE: it is possible that we will not ever call self.close(). This
        #       case occurs when will_close is TRUE, length is None, and we
        #       read up to the last byte, but NOT past it.
        #
        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
        #          called, meaning self.isclosed() is meaningful.
        return self.fp is None

    def read(self, amt=None):
        if self.fp is None:
            return ''

        if self.chunked:
389
            return self._read_chunked(amt)
Tim Peters's avatar
Tim Peters committed
390

391
        if amt is None:
392 393 394 395 396
            # unbounded read
            if self.will_close:
                s = self.fp.read()
            else:
                s = self._safe_read(self.length)
Tim Peters's avatar
Tim Peters committed
397
            self.close()        # we read everything
398 399 400 401 402 403
            return s

        if self.length is not None:
            if amt > self.length:
                # clip the read to the "end of response"
                amt = self.length
404
            self.length -= amt
405 406 407 408 409 410 411 412

        # we do not use _safe_read() here because this may be a .will_close
        # connection, and the user is reading more bytes than will be provided
        # (for example, reading in 1k chunks)
        s = self.fp.read(amt)

        return s

413 414 415 416 417 418 419
    def _read_chunked(self, amt):
        assert self.chunked != _UNKNOWN
        chunk_left = self.chunk_left
        value = ''

        # XXX This accumulates chunks by repeated string concatenation,
        # which is not efficient as the number or size of chunks gets big.
Raymond Hettinger's avatar
Raymond Hettinger committed
420
        while True:
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
            if chunk_left is None:
                line = self.fp.readline()
                i = line.find(';')
                if i >= 0:
                    line = line[:i] # strip chunk-extensions
                chunk_left = int(line, 16)
                if chunk_left == 0:
                    break
            if amt is None:
                value += self._safe_read(chunk_left)
            elif amt < chunk_left:
                value += self._safe_read(amt)
                self.chunk_left = chunk_left - amt
                return value
            elif amt == chunk_left:
                value += self._safe_read(amt)
                self._safe_read(2)  # toss the CRLF at the end of the chunk
                self.chunk_left = None
                return value
            else:
                value += self._safe_read(chunk_left)
                amt -= chunk_left

            # we read the whole chunk, get another
            self._safe_read(2)      # toss the CRLF at the end of the chunk
            chunk_left = None

        # read and discard trailer up to the CRLF terminator
        ### note: we shouldn't have any trailers!
Raymond Hettinger's avatar
Raymond Hettinger committed
450
        while True:
451 452 453 454 455 456 457 458
            line = self.fp.readline()
            if line == '\r\n':
                break

        # we read everything; close the "file"
        self.close()

        return value
Tim Peters's avatar
Tim Peters committed
459

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
    def _safe_read(self, amt):
        """Read the number of bytes requested, compensating for partial reads.

        Normally, we have a blocking socket, but a read() can be interrupted
        by a signal (resulting in a partial read).

        Note that we cannot distinguish between EOF and an interrupt when zero
        bytes have been read. IncompleteRead() will be raised in this
        situation.

        This function should be used when <amt> bytes "should" be present for
        reading. If the bytes are truly not available (due to EOF), then the
        IncompleteRead exception can be used to detect the problem.
        """
        s = ''
        while amt > 0:
            chunk = self.fp.read(amt)
            if not chunk:
                raise IncompleteRead(s)
Raymond Hettinger's avatar
Raymond Hettinger committed
479 480
            s += chunk
            amt -= len(chunk)
481 482 483 484 485 486
        return s

    def getheader(self, name, default=None):
        if self.msg is None:
            raise ResponseNotReady()
        return self.msg.getheader(name, default)
487 488 489 490


class HTTPConnection:

491 492 493 494 495 496
    _http_vsn = 11
    _http_vsn_str = 'HTTP/1.1'

    response_class = HTTPResponse
    default_port = HTTP_PORT
    auto_open = 1
497
    debuglevel = 0
498
    strict = 0
499

500
    def __init__(self, host, port=None, strict=None):
501
        self.sock = None
502
        self._buffer = []
503 504
        self.__response = None
        self.__state = _CS_IDLE
505
        self._method = None
Tim Peters's avatar
Tim Peters committed
506

507
        self._set_hostport(host, port)
508 509
        if strict is not None:
            self.strict = strict
510 511 512

    def _set_hostport(self, host, port):
        if port is None:
513
            i = host.find(':')
514
            if i >= 0:
515 516 517
                try:
                    port = int(host[i+1:])
                except ValueError:
518
                    raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
519 520 521 522 523 524
                host = host[:i]
            else:
                port = self.default_port
        self.host = host
        self.port = port

525 526 527
    def set_debuglevel(self, level):
        self.debuglevel = level

528 529
    def connect(self):
        """Connect to the host and port specified in __init__."""
530
        msg = "getaddrinfo returns an empty list"
531 532
        for res in socket.getaddrinfo(self.host, self.port, 0,
                                      socket.SOCK_STREAM):
533 534 535 536 537 538 539 540 541
            af, socktype, proto, canonname, sa = res
            try:
                self.sock = socket.socket(af, socktype, proto)
                if self.debuglevel > 0:
                    print "connect: (%s, %s)" % (self.host, self.port)
                self.sock.connect(sa)
            except socket.error, msg:
                if self.debuglevel > 0:
                    print 'connect fail:', (self.host, self.port)
542 543
                if self.sock:
                    self.sock.close()
544 545 546 547 548
                self.sock = None
                continue
            break
        if not self.sock:
            raise socket.error, msg
549 550 551 552

    def close(self):
        """Close the connection to the HTTP server."""
        if self.sock:
Tim Peters's avatar
Tim Peters committed
553
            self.sock.close()   # close it manually... there may be other refs
554 555 556 557 558 559 560 561 562 563 564 565 566 567
            self.sock = None
        if self.__response:
            self.__response.close()
            self.__response = None
        self.__state = _CS_IDLE

    def send(self, str):
        """Send `str' to the server."""
        if self.sock is None:
            if self.auto_open:
                self.connect()
            else:
                raise NotConnected()

568
        # send the data to the server. if we get a broken pipe, then close
569 570 571 572
        # the socket. we want to reconnect when somebody tries to send again.
        #
        # NOTE: we DO propagate the error, though, because we cannot simply
        #       ignore the error... the caller will know if they can retry.
573 574
        if self.debuglevel > 0:
            print "send:", repr(str)
575
        try:
576
            self.sock.sendall(str)
577
        except socket.error, v:
Tim Peters's avatar
Tim Peters committed
578
            if v[0] == 32:      # Broken pipe
579 580 581
                self.close()
            raise

582 583
    def _output(self, s):
        """Add a line of output to the current request buffer.
Tim Peters's avatar
Tim Peters committed
584

585
        Assumes that the line does *not* end with \\r\\n.
586 587 588 589 590 591
        """
        self._buffer.append(s)

    def _send_output(self):
        """Send the currently buffered request and clear the buffer.

592
        Appends an extra \\r\\n to the buffer.
593 594 595 596 597 598
        """
        self._buffer.extend(("", ""))
        msg = "\r\n".join(self._buffer)
        del self._buffer[:]
        self.send(msg)

599
    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
600 601 602 603
        """Send a request to the server.

        `method' specifies an HTTP request method, e.g. 'GET'.
        `url' specifies the object being requested, e.g. '/index.html'.
604 605 606
        `skip_host' if True does not add automatically a 'Host:' header
        `skip_accept_encoding' if True does not add automatically an
           'Accept-Encoding:' header
607 608
        """

609
        # if a prior response has been completed, then forget about it.
610 611 612
        if self.__response and self.__response.isclosed():
            self.__response = None

Tim Peters's avatar
Tim Peters committed
613

614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
        # in certain cases, we cannot issue another request on this connection.
        # this occurs when:
        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
        #   2) a response to a previous request has signalled that it is going
        #      to close the connection upon completion.
        #   3) the headers for the previous response have not been read, thus
        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
        #
        # if there is no prior response, then we can request at will.
        #
        # if point (2) is true, then we will have passed the socket to the
        # response (effectively meaning, "there is no prior response"), and
        # will open a new one when a new request is made.
        #
        # Note: if a prior response exists, then we *can* start a new request.
        #       We are not allowed to begin fetching the response to this new
        #       request, however, until that prior response is complete.
        #
        if self.__state == _CS_IDLE:
            self.__state = _CS_REQ_STARTED
        else:
            raise CannotSendRequest()

637 638
        # Save the method we use, we need it later in the response phase
        self._method = method
639 640
        if not url:
            url = '/'
641
        str = '%s %s %s' % (method, url, self._http_vsn_str)
642

643
        self._output(str)
644 645 646 647

        if self._http_vsn == 11:
            # Issue some standard headers for better HTTP/1.1 compliance

Jeremy Hylton's avatar
Jeremy Hylton committed
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
            if not skip_host:
                # this header is issued *only* for HTTP/1.1
                # connections. more specifically, this means it is
                # only issued when the client uses the new
                # HTTPConnection() class. backwards-compat clients
                # will be using HTTP/1.0 and those clients may be
                # issuing this header themselves. we should NOT issue
                # it twice; some web servers (such as Apache) barf
                # when they see two Host: headers

                # If we need a non-standard port,include it in the
                # header.  If the request is going through a proxy,
                # but the host of the actual URL, not the host of the
                # proxy.

                netloc = ''
                if url.startswith('http'):
                    nil, netloc, nil, nil, nil = urlsplit(url)

                if netloc:
668
                    self.putheader('Host', netloc.encode("idna"))
Jeremy Hylton's avatar
Jeremy Hylton committed
669
                elif self.port == HTTP_PORT:
670
                    self.putheader('Host', self.host.encode("idna"))
Jeremy Hylton's avatar
Jeremy Hylton committed
671
                else:
672
                    self.putheader('Host', "%s:%s" % (self.host.encode("idna"), self.port))
673 674 675 676 677 678 679 680 681

            # note: we are assuming that clients will not attempt to set these
            #       headers since *this* library must deal with the
            #       consequences. this also means that when the supporting
            #       libraries are updated to recognize other forms, then this
            #       code should be changed (removed or updated).

            # we only want a Content-Encoding of "identity" since we don't
            # support encodings such as x-gzip or x-deflate.
682 683
            if not skip_accept_encoding:
                self.putheader('Accept-Encoding', 'identity')
684 685 686 687 688 689 690 691

            # we can accept "chunked" Transfer-Encodings, but no others
            # NOTE: no TE header implies *only* "chunked"
            #self.putheader('TE', 'chunked')

            # if TE is supplied in the header, then it must appear in a
            # Connection header.
            #self.putheader('Connection', 'TE')
692

693 694 695 696 697 698 699 700 701 702 703 704
        else:
            # For HTTP/1.0, the server will assume "not chunked"
            pass

    def putheader(self, header, value):
        """Send a request header line to the server.

        For example: h.putheader('Accept', 'text/html')
        """
        if self.__state != _CS_REQ_STARTED:
            raise CannotSendHeader()

705 706
        str = '%s: %s' % (header, value)
        self._output(str)
707 708 709 710 711 712 713 714 715

    def endheaders(self):
        """Indicate that the last header line has been sent to the server."""

        if self.__state == _CS_REQ_STARTED:
            self.__state = _CS_REQ_SENT
        else:
            raise CannotSendHeader()

716
        self._send_output()
717 718 719 720 721 722 723 724 725 726 727 728 729 730

    def request(self, method, url, body=None, headers={}):
        """Send a complete request to the server."""

        try:
            self._send_request(method, url, body, headers)
        except socket.error, v:
            # trap 'Broken pipe' if we're allowed to automatically reconnect
            if v[0] != 32 or not self.auto_open:
                raise
            # try one more time
            self._send_request(method, url, body, headers)

    def _send_request(self, method, url, body, headers):
Jeremy Hylton's avatar
Jeremy Hylton committed
731 732 733
        # If headers already contains a host header, then define the
        # optional skip_host argument to putrequest().  The check is
        # harder because field names are case insensitive.
Tim Peters's avatar
Tim Peters committed
734
        if 'host' in [k.lower() for k in headers]:
Jeremy Hylton's avatar
Jeremy Hylton committed
735 736 737
            self.putrequest(method, url, skip_host=1)
        else:
            self.putrequest(method, url)
738 739 740

        if body:
            self.putheader('Content-Length', str(len(body)))
Raymond Hettinger's avatar
Raymond Hettinger committed
741
        for hdr, value in headers.iteritems():
742 743 744 745 746 747 748 749 750
            self.putheader(hdr, value)
        self.endheaders()

        if body:
            self.send(body)

    def getresponse(self):
        "Get the response from the server."

751
        # if a prior response has been completed, then forget about it.
752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
        if self.__response and self.__response.isclosed():
            self.__response = None

        #
        # if a prior response exists, then it must be completed (otherwise, we
        # cannot read this response's header to determine the connection-close
        # behavior)
        #
        # note: if a prior response existed, but was connection-close, then the
        # socket and response were made independent of this HTTPConnection
        # object since a new request requires that we open a whole new
        # connection
        #
        # this means the prior response had one of two states:
        #   1) will_close: this connection was reset and the prior socket and
        #                  response operate independently
        #   2) persistent: the response was retained and we await its
        #                  isclosed() status to become true.
        #
        if self.__state != _CS_REQ_SENT or self.__response:
            raise ResponseNotReady()

774
        if self.debuglevel > 0:
775
            response = self.response_class(self.sock, self.debuglevel,
Tim Peters's avatar
Tim Peters committed
776
                                           strict=self.strict,
777
                                           method=self._method)
778
        else:
779 780
            response = self.response_class(self.sock, strict=self.strict,
                                           method=self._method)
781

782
        response.begin()
783
        assert response.will_close != _UNKNOWN
784 785 786 787 788 789 790 791 792 793
        self.__state = _CS_IDLE

        if response.will_close:
            # this effectively passes the connection to the response
            self.close()
        else:
            # remember this, so we can tell when it is complete
            self.__response = response

        return response
794

Jeremy Hylton's avatar
Jeremy Hylton committed
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
# The next several classes are used to define FakeSocket,a socket-like
# interface to an SSL connection.

# The primary complexity comes from faking a makefile() method.  The
# standard socket makefile() implementation calls dup() on the socket
# file descriptor.  As a consequence, clients can call close() on the
# parent socket and its makefile children in any order.  The underlying
# socket isn't closed until they are all closed.

# The implementation uses reference counting to keep the socket open
# until the last client calls close().  SharedSocket keeps track of
# the reference counting and SharedSocketClient provides an constructor
# and close() method that call incref() and decref() correctly.

class SharedSocket:

    def __init__(self, sock):
        self.sock = sock
        self._refcnt = 0

    def incref(self):
        self._refcnt += 1

    def decref(self):
        self._refcnt -= 1
        assert self._refcnt >= 0
        if self._refcnt == 0:
            self.sock.close()

    def __del__(self):
        self.sock.close()

class SharedSocketClient:

    def __init__(self, shared):
        self._closed = 0
        self._shared = shared
        self._shared.incref()
        self._sock = shared.sock

    def close(self):
        if not self._closed:
            self._shared.decref()
            self._closed = 1
            self._shared = None

class SSLFile(SharedSocketClient):
842
    """File-like object wrapping an SSL socket."""
843

844
    BUFSIZE = 8192
Tim Peters's avatar
Tim Peters committed
845

846
    def __init__(self, sock, ssl, bufsize=None):
Jeremy Hylton's avatar
Jeremy Hylton committed
847
        SharedSocketClient.__init__(self, sock)
848 849 850
        self._ssl = ssl
        self._buf = ''
        self._bufsize = bufsize or self.__class__.BUFSIZE
851

852 853 854
    def _read(self):
        buf = ''
        # put in a loop so that we retry on transient errors
Raymond Hettinger's avatar
Raymond Hettinger committed
855
        while True:
856
            try:
857
                buf = self._ssl.read(self._bufsize)
Jeremy Hylton's avatar
Jeremy Hylton committed
858 859
            except socket.sslerror, err:
                if (err[0] == socket.SSL_ERROR_WANT_READ
860
                    or err[0] == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton's avatar
Jeremy Hylton committed
861
                    continue
862 863
                if (err[0] == socket.SSL_ERROR_ZERO_RETURN
                    or err[0] == socket.SSL_ERROR_EOF):
Jeremy Hylton's avatar
Jeremy Hylton committed
864 865 866
                    break
                raise
            except socket.error, err:
867
                if err[0] == errno.EINTR:
Jeremy Hylton's avatar
Jeremy Hylton committed
868
                    continue
869 870 871
                if err[0] == errno.EBADF:
                    # XXX socket was closed?
                    break
Jeremy Hylton's avatar
Jeremy Hylton committed
872
                raise
873 874 875 876 877 878
            else:
                break
        return buf

    def read(self, size=None):
        L = [self._buf]
879 880 881 882 883 884 885 886
        avail = len(self._buf)
        while size is None or avail < size:
            s = self._read()
            if s == '':
                break
            L.append(s)
            avail += len(s)
        all = "".join(L)
887 888
        if size is None:
            self._buf = ''
889
            return all
890
        else:
891 892
            self._buf = all[size:]
            return all[:size]
893 894 895 896

    def readline(self):
        L = [self._buf]
        self._buf = ''
897 898 899
        while 1:
            i = L[-1].find("\n")
            if i >= 0:
Raymond Hettinger's avatar
Raymond Hettinger committed
900
                break
901 902 903 904 905 906 907 908 909 910 911 912 913 914
            s = self._read()
            if s == '':
                break
            L.append(s)
        if i == -1:
            # loop exited because there is no more data
            return "".join(L)
        else:
            all = "".join(L)
            # XXX could do enough bookkeeping not to do a 2nd search
            i = all.find("\n") + 1
            line = all[:i]
            self._buf = all[i:]
            return line
915

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
    def readlines(self, sizehint=0):
        total = 0
        list = []
        while True:
            line = self.readline()
            if not line:
                break
            list.append(line)
            total += len(line)
            if sizehint and total >= sizehint:
                break
        return list

    def fileno(self):
        return self._sock.fileno()

    def __iter__(self):
        return self

    def next(self):
        line = self.readline()
        if not line:
            raise StopIteration
        return line

Jeremy Hylton's avatar
Jeremy Hylton committed
941 942 943 944 945
class FakeSocket(SharedSocketClient):

    class _closedsocket:
        def __getattr__(self, name):
            raise error(9, 'Bad file descriptor')
946 947

    def __init__(self, sock, ssl):
Jeremy Hylton's avatar
Jeremy Hylton committed
948 949 950 951 952 953 954
        sock = SharedSocket(sock)
        SharedSocketClient.__init__(self, sock)
        self._ssl = ssl

    def close(self):
        SharedSocketClient.close(self)
        self._sock = self.__class__._closedsocket()
955 956 957 958

    def makefile(self, mode, bufsize=None):
        if mode != 'r' and mode != 'rb':
            raise UnimplementedFileMode()
Jeremy Hylton's avatar
Jeremy Hylton committed
959
        return SSLFile(self._shared, self._ssl, bufsize)
960

961
    def send(self, stuff, flags = 0):
Jeremy Hylton's avatar
Jeremy Hylton committed
962
        return self._ssl.write(stuff)
963

Jeremy Hylton's avatar
Jeremy Hylton committed
964
    sendall = send
965

966
    def recv(self, len = 1024, flags = 0):
Jeremy Hylton's avatar
Jeremy Hylton committed
967
        return self._ssl.read(len)
968

969
    def __getattr__(self, attr):
Jeremy Hylton's avatar
Jeremy Hylton committed
970
        return getattr(self._sock, attr)
971 972 973


class HTTPSConnection(HTTPConnection):
974 975 976 977
    "This class allows communication via SSL."

    default_port = HTTPS_PORT

978 979 980
    def __init__(self, host, port=None, key_file=None, cert_file=None,
                 strict=None):
        HTTPConnection.__init__(self, host, port, strict)
981 982
        self.key_file = key_file
        self.cert_file = cert_file
983 984 985 986 987 988

    def connect(self):
        "Connect to a host on a given (SSL) port."

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((self.host, self.port))
989
        ssl = socket.ssl(sock, self.key_file, self.cert_file)
990
        self.sock = FakeSocket(sock, ssl)
991 992


Jeremy Hylton's avatar
Jeremy Hylton committed
993
class HTTP:
994
    "Compatibility class with httplib.py from 1.5."
995

996 997
    _http_vsn = 10
    _http_vsn_str = 'HTTP/1.0'
998

999
    debuglevel = 0
1000

Jeremy Hylton's avatar
Jeremy Hylton committed
1001 1002
    _connection_class = HTTPConnection

1003
    def __init__(self, host='', port=None, strict=None):
1004
        "Provide a default host, since the superclass requires one."
1005

1006 1007 1008
        # some joker passed 0 explicitly, meaning default port
        if port == 0:
            port = None
1009

1010 1011 1012
        # Note that we may pass an empty string as the host; this will throw
        # an error when we attempt to connect. Presumably, the client code
        # will call connect before then, with a proper host.
1013
        self._setup(self._connection_class(host, port, strict))
Greg Stein's avatar
Greg Stein committed
1014 1015 1016 1017

    def _setup(self, conn):
        self._conn = conn

Jeremy Hylton's avatar
Jeremy Hylton committed
1018
        # set up delegation to flesh out interface
Greg Stein's avatar
Greg Stein committed
1019 1020 1021 1022 1023 1024 1025
        self.send = conn.send
        self.putrequest = conn.putrequest
        self.endheaders = conn.endheaders
        self.set_debuglevel = conn.set_debuglevel

        conn._http_vsn = self._http_vsn
        conn._http_vsn_str = self._http_vsn_str
1026

1027
        self.file = None
1028

1029 1030
    def connect(self, host=None, port=None):
        "Accept arguments to set the host/port, since the superclass doesn't."
1031

1032
        if host is not None:
Jeremy Hylton's avatar
Jeremy Hylton committed
1033 1034
            self._conn._set_hostport(host, port)
        self._conn.connect()
1035

1036 1037 1038
    def getfile(self):
        "Provide a getfile, since the superclass' does not use this concept."
        return self.file
1039

1040 1041
    def putheader(self, header, *values):
        "The superclass allows only one value argument."
1042
        self._conn.putheader(header, '\r\n\t'.join(values))
1043

1044 1045
    def getreply(self):
        """Compat definition since superclass does not define it.
1046

1047 1048 1049 1050 1051 1052
        Returns a tuple consisting of:
        - server status code (e.g. '200' if all goes well)
        - server "reason" corresponding to status code
        - any RFC822 headers in the response from the server
        """
        try:
Jeremy Hylton's avatar
Jeremy Hylton committed
1053
            response = self._conn.getresponse()
1054 1055 1056
        except BadStatusLine, e:
            ### hmm. if getresponse() ever closes the socket on a bad request,
            ### then we are going to have problems with self.sock
1057

1058 1059
            ### should we keep this behavior? do people use it?
            # keep the socket open (as a file), and return it
Jeremy Hylton's avatar
Jeremy Hylton committed
1060
            self.file = self._conn.sock.makefile('rb', 0)
1061

1062 1063
            # close our socket -- we want to restart after any protocol error
            self.close()
1064

1065 1066
            self.headers = None
            return -1, e.line, None
1067

1068 1069 1070
        self.headers = response.msg
        self.file = response.fp
        return response.status, response.reason, response.msg
1071

1072
    def close(self):
Jeremy Hylton's avatar
Jeremy Hylton committed
1073
        self._conn.close()
1074

1075 1076 1077 1078 1079 1080
        # note that self.file == response.fp, which gets closed by the
        # superclass. just clear the object ref here.
        ### hmm. messy. if status==-1, then self.file is owned by us.
        ### well... we aren't explicitly closing, but losing this ref will
        ### do it
        self.file = None
1081

Jeremy Hylton's avatar
Jeremy Hylton committed
1082 1083 1084 1085 1086 1087
if hasattr(socket, 'ssl'):
    class HTTPS(HTTP):
        """Compatibility with 1.5 httplib interface

        Python 1.5.2 did not have an HTTPS class, but it defined an
        interface for sending http requests that is also useful for
1088
        https.
Jeremy Hylton's avatar
Jeremy Hylton committed
1089 1090
        """

1091
        _connection_class = HTTPSConnection
1092

1093 1094
        def __init__(self, host='', port=None, key_file=None, cert_file=None,
                     strict=None):
Greg Stein's avatar
Greg Stein committed
1095 1096 1097 1098 1099
            # provide a default host, pass the X509 cert info

            # urf. compensate for bad input.
            if port == 0:
                port = None
1100 1101
            self._setup(self._connection_class(host, port, key_file,
                                               cert_file, strict))
Greg Stein's avatar
Greg Stein committed
1102 1103 1104

            # we never actually use these for anything, but we keep them
            # here for compatibility with post-1.5.2 CVS.
1105 1106
            self.key_file = key_file
            self.cert_file = cert_file
Greg Stein's avatar
Greg Stein committed
1107

1108 1109

class HTTPException(Exception):
Jeremy Hylton's avatar
Jeremy Hylton committed
1110 1111
    # Subclasses that define an __init__ must call Exception.__init__
    # or define self.args.  Otherwise, str() will fail.
1112
    pass
1113 1114

class NotConnected(HTTPException):
1115
    pass
1116

1117 1118 1119
class InvalidURL(HTTPException):
    pass

1120
class UnknownProtocol(HTTPException):
1121
    def __init__(self, version):
Jeremy Hylton's avatar
Jeremy Hylton committed
1122
        self.args = version,
1123
        self.version = version
1124 1125

class UnknownTransferEncoding(HTTPException):
1126
    pass
1127 1128

class UnimplementedFileMode(HTTPException):
1129
    pass
1130 1131

class IncompleteRead(HTTPException):
1132
    def __init__(self, partial):
Jeremy Hylton's avatar
Jeremy Hylton committed
1133
        self.args = partial,
1134
        self.partial = partial
1135 1136

class ImproperConnectionState(HTTPException):
1137
    pass
1138 1139

class CannotSendRequest(ImproperConnectionState):
1140
    pass
1141 1142

class CannotSendHeader(ImproperConnectionState):
1143
    pass
1144 1145

class ResponseNotReady(ImproperConnectionState):
1146
    pass
1147 1148

class BadStatusLine(HTTPException):
1149
    def __init__(self, line):
Jeremy Hylton's avatar
Jeremy Hylton committed
1150
        self.args = line,
1151
        self.line = line
1152 1153 1154 1155

# for backwards compatibility
error = HTTPException

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
class LineAndFileWrapper:
    """A limited file-like object for HTTP/0.9 responses."""

    # The status-line parsing code calls readline(), which normally
    # get the HTTP status line.  For a 0.9 response, however, this is
    # actually the first line of the body!  Clients need to get a
    # readable file object that contains that line.

    def __init__(self, line, file):
        self._line = line
        self._file = file
        self._line_consumed = 0
        self._line_offset = 0
        self._line_left = len(line)

    def __getattr__(self, attr):
        return getattr(self._file, attr)

    def _done(self):
        # called when the last byte is read from the line.  After the
        # call, all read methods are delegated to the underlying file
Skip Montanaro's avatar
Skip Montanaro committed
1177
        # object.
1178 1179 1180 1181 1182 1183
        self._line_consumed = 1
        self.read = self._file.read
        self.readline = self._file.readline
        self.readlines = self._file.readlines

    def read(self, amt=None):
1184 1185 1186
        if self._line_consumed:
            return self._file.read(amt)
        assert self._line_left
1187 1188 1189 1190 1191 1192
        if amt is None or amt > self._line_left:
            s = self._line[self._line_offset:]
            self._done()
            if amt is None:
                return s + self._file.read()
            else:
Tim Peters's avatar
Tim Peters committed
1193
                return s + self._file.read(amt - len(s))
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203
        else:
            assert amt <= self._line_left
            i = self._line_offset
            j = i + amt
            s = self._line[i:j]
            self._line_offset = j
            self._line_left -= amt
            if self._line_left == 0:
                self._done()
            return s
Tim Peters's avatar
Tim Peters committed
1204

1205
    def readline(self):
1206 1207 1208
        if self._line_consumed:
            return self._file.readline()
        assert self._line_left
1209 1210 1211 1212 1213
        s = self._line[self._line_offset:]
        self._done()
        return s

    def readlines(self, size=None):
1214 1215 1216
        if self._line_consumed:
            return self._file.readlines(size)
        assert self._line_left
1217 1218 1219 1220 1221 1222
        L = [self._line[self._line_offset:]]
        self._done()
        if size is None:
            return L + self._file.readlines()
        else:
            return L + self._file.readlines(size)
1223

1224
def test():
1225 1226
    """Test this module.

Jeremy Hylton's avatar
Jeremy Hylton committed
1227 1228
    A hodge podge of tests collected here, because they have too many
    external dependencies for the regular test suite.
1229
    """
1230

1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245
    import sys
    import getopt
    opts, args = getopt.getopt(sys.argv[1:], 'd')
    dl = 0
    for o, a in opts:
        if o == '-d': dl = dl + 1
    host = 'www.python.org'
    selector = '/'
    if args[0:]: host = args[0]
    if args[1:]: selector = args[1]
    h = HTTP()
    h.set_debuglevel(dl)
    h.connect(host)
    h.putrequest('GET', selector)
    h.endheaders()
1246 1247 1248
    status, reason, headers = h.getreply()
    print 'status =', status
    print 'reason =', reason
Jeremy Hylton's avatar
Jeremy Hylton committed
1249
    print "read", len(h.getfile().read())
1250 1251
    print
    if headers:
1252
        for header in headers.headers: print header.strip()
1253
    print
1254

1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
    # minimal test that code to extract host from url works
    class HTTP11(HTTP):
        _http_vsn = 11
        _http_vsn_str = 'HTTP/1.1'

    h = HTTP11('www.python.org')
    h.putrequest('GET', 'http://www.python.org/~jeremy/')
    h.endheaders()
    h.getreply()
    h.close()

1266
    if hasattr(socket, 'ssl'):
Tim Peters's avatar
Tim Peters committed
1267

Jeremy Hylton's avatar
Jeremy Hylton committed
1268 1269 1270 1271
        for host, selector in (('sourceforge.net', '/projects/python'),
                               ):
            print "https://%s%s" % (host, selector)
            hs = HTTPS()
1272
            hs.set_debuglevel(dl)
Jeremy Hylton's avatar
Jeremy Hylton committed
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
            hs.connect(host)
            hs.putrequest('GET', selector)
            hs.endheaders()
            status, reason, headers = hs.getreply()
            print 'status =', status
            print 'reason =', reason
            print "read", len(hs.getfile().read())
            print
            if headers:
                for header in headers.headers: print header.strip()
            print

1285
if __name__ == '__main__':
1286
    test()