Issue #24363: Continue parsing HTTP header in spite of invalid lines

75b2c5f8 · Martin Panter · e8bba99a · 75b2c5f8 · 75b2c5f8 · 75b2c5f8
Kaydet (Commit) 75b2c5f8 authored Eyl 16, 2016 tarafından Martin Panter
Hide whitespace changes
Inline Side-by-side

Showing with 129 additions and 25 deletions

httplib.py Lib/httplib.py +9 -24

test_httplib.py Lib/test/test_httplib.py +115 -1

NEWS Misc/NEWS +5 -0

No files found.
--- a/Lib/httplib.py
+++ b/Lib/httplib.py
@@ -273,9 +273,8 @@ class HTTPMessage(mimetools.Message):
        Read header lines up to the entirely blank line that terminates them.
        The (normally blank) line that ends the headers is skipped, but not
-        included in the returned list.  If a non-header line ends the headers,
+        included in the returned list.  If an invalid line is found in the
-        (which is an error), an attempt is made to backspace over it; it is
+        header section, it is skipped, and further lines are processed.
-        never included in the returned list.
        The variable self.status is set to the empty string if all went well,
        otherwise it is an error message.  The variable self.headers is a
@@ -302,19 +301,17 @@ class HTTPMessage(mimetools.Message):
        self.status = ''
        headerseen = ""
        firstline = 1
-        startofline = unread = tell = None
+        tell = None
-        if hasattr(self.fp, 'unread'):
+        if not hasattr(self.fp, 'unread') and self.seekable:
-            unread = self.fp.unread
-        elif self.seekable:
            tell = self.fp.tell
        while True:
            if len(hlist) > _MAXHEADERS:
                raise HTTPException("got more than %d headers" % _MAXHEADERS)
            if tell:
                try:
-                    startofline = tell()
+                    tell()
                except IOError:
-                    startofline = tell = None
+                    tell = None
                    self.seekable = 0
            line = self.fp.readline(_MAXLINE + 1)
            if len(line) > _MAXLINE:
@@ -345,26 +342,14 @@ class HTTPMessage(mimetools.Message):
                # It's a legal header line, save it.
                hlist.append(line)
                self.addheader(headerseen, line[len(headerseen)+1:].strip())
-                continue
            elif headerseen is not None:
                # An empty header name. These aren't allowed in HTTP, but it's
                # probably a benign mistake. Don't add the header, just keep
                # going.
-                continue
+                pass
            else:
-                # It's not a header line; throw it back and stop here.
+                # It's not a header line; skip it and try the next line.
-                if not self.dict:
+                self.status = 'Non-header line where header expected'
-                    self.status = 'No headers'
-                else:
-                    self.status = 'Non-header line where header expected'
-                # Try to undo the read.
-                if unread:
-                    unread(line)
-                elif tell:
-                    self.fp.seek(startofline)
-                else:
-                    self.status = self.status + '; bad seek'
-                break
 class HTTPResponse:

--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -241,6 +241,120 @@ class HeaderTests(TestCase):
        self.assertEqual(resp.getheader('First'), 'val')
        self.assertEqual(resp.getheader('Second'), 'val')
+    def test_malformed_truncation(self):
+        # Other malformed header lines, especially without colons, used to
+        # cause the rest of the header section to be truncated
+        resp = (
+            b'HTTP/1.1 200 OK\r\n'
+            b'Public-Key-Pins: \n'
+            b'pin-sha256="xxx=";\n'
+            b'report-uri="https://..."\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'4\r\nbody\r\n0\r\n\r\n'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertIsNotNone(resp.getheader('Public-Key-Pins'))
+        self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+        self.assertEqual(resp.read(), b'body')
+    def test_blank_line_forms(self):
+        # Test that both CRLF and LF blank lines can terminate the header
+        # section and start the body
+        for blank in (b'\r\n', b'\n'):
+            resp = b'HTTP/1.1 200 OK\r\n' b'Transfer-Encoding: chunked\r\n'
+            resp += blank
+            resp += b'4\r\nbody\r\n0\r\n\r\n'
+            resp = httplib.HTTPResponse(FakeSocket(resp))
+            resp.begin()
+            self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+            self.assertEqual(resp.read(), b'body')
+            resp = b'HTTP/1.0 200 OK\r\n' + blank + b'body'
+            resp = httplib.HTTPResponse(FakeSocket(resp))
+            resp.begin()
+            self.assertEqual(resp.read(), b'body')
+        # A blank line ending in CR is not treated as the end of the HTTP
+        # header section, therefore header fields following it should be
+        # parsed if possible
+        resp = (
+            b'HTTP/1.1 200 OK\r\n'
+            b'\r'
+            b'Name: value\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'4\r\nbody\r\n0\r\n\r\n'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+        self.assertEqual(resp.read(), b'body')
+        # No header fields nor blank line
+        resp = b'HTTP/1.0 200 OK\r\n'
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertEqual(resp.read(), b'')
+    def test_from_line(self):
+        # The parser handles "From" lines specially, so test this does not
+        # affect parsing the rest of the header section
+        resp = (
+            b'HTTP/1.1 200 OK\r\n'
+            b'From start\r\n'
+            b' continued\r\n'
+            b'Name: value\r\n'
+            b'From middle\r\n'
+            b' continued\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'From end\r\n'
+            b'\r\n'
+            b'4\r\nbody\r\n0\r\n\r\n'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertIsNotNone(resp.getheader('Name'))
+        self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+        self.assertEqual(resp.read(), b'body')
+        resp = (
+            b'HTTP/1.0 200 OK\r\n'
+            b'From alone\r\n'
+            b'\r\n'
+            b'body'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertEqual(resp.read(), b'body')
+    def test_parse_all_octets(self):
+        # Ensure no valid header field octet breaks the parser
+        body = (
+            b'HTTP/1.1 200 OK\r\n'
+            b"!#$%&'*+-.^_`|~: value\r\n"  # Special token characters
+            b'VCHAR: ' + bytearray(range(0x21, 0x7E + 1)) + b'\r\n'
+            b'obs-text: ' + bytearray(range(0x80, 0xFF + 1)) + b'\r\n'
+            b'obs-fold: text\r\n'
+            b' folded with space\r\n'
+            b'\tfolded with tab\r\n'
+            b'Content-Length: 0\r\n'
+            b'\r\n'
+        )
+        sock = FakeSocket(body)
+        resp = httplib.HTTPResponse(sock)
+        resp.begin()
+        self.assertEqual(resp.getheader('Content-Length'), '0')
+        self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
+        vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
+        self.assertEqual(resp.getheader('VCHAR'), vchar)
+        self.assertIsNotNone(resp.getheader('obs-text'))
+        folded = resp.getheader('obs-fold')
+        self.assertTrue(folded.startswith('text'))
+        self.assertIn(' folded with space', folded)
+        self.assertTrue(folded.endswith('folded with tab'))
    def test_invalid_headers(self):
        conn = httplib.HTTPConnection('example.com')
        conn.sock = FakeSocket('')
@@ -525,7 +639,7 @@ class BasicTest(TestCase):
        self.assertTrue(hasattr(resp,'fileno'),
                'HTTPResponse should expose a fileno attribute')
-    # Test lines overflowing the max line size (_MAXLINE in http.client)
+    # Test lines overflowing the max line size (_MAXLINE in httplib)
    def test_overflowing_status_line(self):
        self.skipTest("disabled for HTTP 0.9 support")

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,11 @@ Core and Builtins
 Library
 -------
+- Issue #24363: When parsing HTTP header fields, if an invalid line is
+  encountered, skip it and continue parsing.  Previously, no more header
+  fields were parsed, which could lead to fields for HTTP framing like
+  Content-Length and Transfer-Encoding being overlooked.
 - Issue #27599: Fixed buffer overrun in binascii.b2a_qp() and binascii.a2b_qp().
 - Issue #25969: Update the lib2to3 grammar to handle the unpacking