Kaydet (Commit) 75b2c5f8 authored tarafından Martin Panter's avatar Martin Panter

Issue #24363: Continue parsing HTTP header in spite of invalid lines

üst e8bba99a
...@@ -273,9 +273,8 @@ class HTTPMessage(mimetools.Message): ...@@ -273,9 +273,8 @@ class HTTPMessage(mimetools.Message):
Read header lines up to the entirely blank line that terminates them. Read header lines up to the entirely blank line that terminates them.
The (normally blank) line that ends the headers is skipped, but not The (normally blank) line that ends the headers is skipped, but not
included in the returned list. If a non-header line ends the headers, included in the returned list. If an invalid line is found in the
(which is an error), an attempt is made to backspace over it; it is header section, it is skipped, and further lines are processed.
never included in the returned list.
The variable self.status is set to the empty string if all went well, The variable self.status is set to the empty string if all went well,
otherwise it is an error message. The variable self.headers is a otherwise it is an error message. The variable self.headers is a
...@@ -302,19 +301,17 @@ class HTTPMessage(mimetools.Message): ...@@ -302,19 +301,17 @@ class HTTPMessage(mimetools.Message):
self.status = '' self.status = ''
headerseen = "" headerseen = ""
firstline = 1 firstline = 1
startofline = unread = tell = None tell = None
if hasattr(self.fp, 'unread'): if not hasattr(self.fp, 'unread') and self.seekable:
unread = self.fp.unread
elif self.seekable:
tell = self.fp.tell tell = self.fp.tell
while True: while True:
if len(hlist) > _MAXHEADERS: if len(hlist) > _MAXHEADERS:
raise HTTPException("got more than %d headers" % _MAXHEADERS) raise HTTPException("got more than %d headers" % _MAXHEADERS)
if tell: if tell:
try: try:
startofline = tell() tell()
except IOError: except IOError:
startofline = tell = None tell = None
self.seekable = 0 self.seekable = 0
line = self.fp.readline(_MAXLINE + 1) line = self.fp.readline(_MAXLINE + 1)
if len(line) > _MAXLINE: if len(line) > _MAXLINE:
...@@ -345,26 +342,14 @@ class HTTPMessage(mimetools.Message): ...@@ -345,26 +342,14 @@ class HTTPMessage(mimetools.Message):
# It's a legal header line, save it. # It's a legal header line, save it.
hlist.append(line) hlist.append(line)
self.addheader(headerseen, line[len(headerseen)+1:].strip()) self.addheader(headerseen, line[len(headerseen)+1:].strip())
continue
elif headerseen is not None: elif headerseen is not None:
# An empty header name. These aren't allowed in HTTP, but it's # An empty header name. These aren't allowed in HTTP, but it's
# probably a benign mistake. Don't add the header, just keep # probably a benign mistake. Don't add the header, just keep
# going. # going.
continue pass
else: else:
# It's not a header line; throw it back and stop here. # It's not a header line; skip it and try the next line.
if not self.dict: self.status = 'Non-header line where header expected'
self.status = 'No headers'
else:
self.status = 'Non-header line where header expected'
# Try to undo the read.
if unread:
unread(line)
elif tell:
self.fp.seek(startofline)
else:
self.status = self.status + '; bad seek'
break
class HTTPResponse: class HTTPResponse:
......
...@@ -241,6 +241,120 @@ class HeaderTests(TestCase): ...@@ -241,6 +241,120 @@ class HeaderTests(TestCase):
self.assertEqual(resp.getheader('First'), 'val') self.assertEqual(resp.getheader('First'), 'val')
self.assertEqual(resp.getheader('Second'), 'val') self.assertEqual(resp.getheader('Second'), 'val')
def test_malformed_truncation(self):
# Other malformed header lines, especially without colons, used to
# cause the rest of the header section to be truncated
resp = (
b'HTTP/1.1 200 OK\r\n'
b'Public-Key-Pins: \n'
b'pin-sha256="xxx=";\n'
b'report-uri="https://..."\r\n'
b'Transfer-Encoding: chunked\r\n'
b'\r\n'
b'4\r\nbody\r\n0\r\n\r\n'
)
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertIsNotNone(resp.getheader('Public-Key-Pins'))
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
self.assertEqual(resp.read(), b'body')
def test_blank_line_forms(self):
# Test that both CRLF and LF blank lines can terminate the header
# section and start the body
for blank in (b'\r\n', b'\n'):
resp = b'HTTP/1.1 200 OK\r\n' b'Transfer-Encoding: chunked\r\n'
resp += blank
resp += b'4\r\nbody\r\n0\r\n\r\n'
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
self.assertEqual(resp.read(), b'body')
resp = b'HTTP/1.0 200 OK\r\n' + blank + b'body'
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertEqual(resp.read(), b'body')
# A blank line ending in CR is not treated as the end of the HTTP
# header section, therefore header fields following it should be
# parsed if possible
resp = (
b'HTTP/1.1 200 OK\r\n'
b'\r'
b'Name: value\r\n'
b'Transfer-Encoding: chunked\r\n'
b'\r\n'
b'4\r\nbody\r\n0\r\n\r\n'
)
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
self.assertEqual(resp.read(), b'body')
# No header fields nor blank line
resp = b'HTTP/1.0 200 OK\r\n'
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertEqual(resp.read(), b'')
def test_from_line(self):
# The parser handles "From" lines specially, so test this does not
# affect parsing the rest of the header section
resp = (
b'HTTP/1.1 200 OK\r\n'
b'From start\r\n'
b' continued\r\n'
b'Name: value\r\n'
b'From middle\r\n'
b' continued\r\n'
b'Transfer-Encoding: chunked\r\n'
b'From end\r\n'
b'\r\n'
b'4\r\nbody\r\n0\r\n\r\n'
)
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertIsNotNone(resp.getheader('Name'))
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
self.assertEqual(resp.read(), b'body')
resp = (
b'HTTP/1.0 200 OK\r\n'
b'From alone\r\n'
b'\r\n'
b'body'
)
resp = httplib.HTTPResponse(FakeSocket(resp))
resp.begin()
self.assertEqual(resp.read(), b'body')
def test_parse_all_octets(self):
# Ensure no valid header field octet breaks the parser
body = (
b'HTTP/1.1 200 OK\r\n'
b"!#$%&'*+-.^_`|~: value\r\n" # Special token characters
b'VCHAR: ' + bytearray(range(0x21, 0x7E + 1)) + b'\r\n'
b'obs-text: ' + bytearray(range(0x80, 0xFF + 1)) + b'\r\n'
b'obs-fold: text\r\n'
b' folded with space\r\n'
b'\tfolded with tab\r\n'
b'Content-Length: 0\r\n'
b'\r\n'
)
sock = FakeSocket(body)
resp = httplib.HTTPResponse(sock)
resp.begin()
self.assertEqual(resp.getheader('Content-Length'), '0')
self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
self.assertEqual(resp.getheader('VCHAR'), vchar)
self.assertIsNotNone(resp.getheader('obs-text'))
folded = resp.getheader('obs-fold')
self.assertTrue(folded.startswith('text'))
self.assertIn(' folded with space', folded)
self.assertTrue(folded.endswith('folded with tab'))
def test_invalid_headers(self): def test_invalid_headers(self):
conn = httplib.HTTPConnection('example.com') conn = httplib.HTTPConnection('example.com')
conn.sock = FakeSocket('') conn.sock = FakeSocket('')
...@@ -525,7 +639,7 @@ class BasicTest(TestCase): ...@@ -525,7 +639,7 @@ class BasicTest(TestCase):
self.assertTrue(hasattr(resp,'fileno'), self.assertTrue(hasattr(resp,'fileno'),
'HTTPResponse should expose a fileno attribute') 'HTTPResponse should expose a fileno attribute')
# Test lines overflowing the max line size (_MAXLINE in http.client) # Test lines overflowing the max line size (_MAXLINE in httplib)
def test_overflowing_status_line(self): def test_overflowing_status_line(self):
self.skipTest("disabled for HTTP 0.9 support") self.skipTest("disabled for HTTP 0.9 support")
......
...@@ -42,6 +42,11 @@ Core and Builtins ...@@ -42,6 +42,11 @@ Core and Builtins
Library Library
------- -------
- Issue #24363: When parsing HTTP header fields, if an invalid line is
encountered, skip it and continue parsing. Previously, no more header
fields were parsed, which could lead to fields for HTTP framing like
Content-Length and Transfer-Encoding being overlooked.
- Issue #27599: Fixed buffer overrun in binascii.b2a_qp() and binascii.a2b_qp(). - Issue #27599: Fixed buffer overrun in binascii.b2a_qp() and binascii.a2b_qp().
- Issue #25969: Update the lib2to3 grammar to handle the unpacking - Issue #25969: Update the lib2to3 grammar to handle the unpacking
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment