Kaydet (Commit) 65d36dab authored tarafından Ezio Melotti's avatar Ezio Melotti

#13987: HTMLParser is now able to handle malformed start tags.

üst d2307cb4
...@@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase): ...@@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase):
- self.__starttag_text.rfind("\n") - self.__starttag_text.rfind("\n")
else: else:
offset = offset + len(self.__starttag_text) offset = offset + len(self.__starttag_text)
self.error("junk characters in start tag: %r" self.handle_data(rawdata[i:endpos])
% (rawdata[k:endpos][:20],)) return endpos
if end.endswith('/>'): if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" /> # XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs) self.handle_startendtag(tag, attrs)
...@@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase): ...@@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase):
# end of input in or before attribute value, or we have the # end of input in or before attribute value, or we have the
# '/' from a '/>' ending # '/' from a '/>' ending
return -1 return -1
self.updatepos(i, j) if j > i:
self.error("malformed start tag") return j
else:
return i + 1
raise AssertionError("we should not get here!") raise AssertionError("we should not get here!")
# Internal -- parse endtag, return end or -1 if incomplete # Internal -- parse endtag, return end or -1 if incomplete
......
...@@ -206,7 +206,8 @@ text ...@@ -206,7 +206,8 @@ text
self._run_check("</$>", [('comment', '$')]) self._run_check("</$>", [('comment', '$')])
self._run_check("</", [('data', '</')]) self._run_check("</", [('data', '</')])
self._run_check("</a", [('data', '</a')]) self._run_check("</a", [('data', '</a')])
self._parse_error("<a<a>") # XXX this might be wrong
self._run_check("<a<a>", [('data', '<a'), ('starttag', 'a', [])])
self._run_check("</a<a>", [('endtag', 'a<a')]) self._run_check("</a<a>", [('endtag', 'a<a')])
self._run_check("<!", [('data', '<!')]) self._run_check("<!", [('data', '<!')])
self._run_check("<a", [('data', '<a')]) self._run_check("<a", [('data', '<a')])
......
...@@ -94,7 +94,7 @@ Library ...@@ -94,7 +94,7 @@ Library
------- -------
- Issue #13987: HTMLParser is now able to handle EOFs in the middle of a - Issue #13987: HTMLParser is now able to handle EOFs in the middle of a
construct. construct and malformed start tags.
- Issue #13015: Fix a possible reference leak in defaultdict.__repr__. - Issue #13015: Fix a possible reference leak in defaultdict.__repr__.
Patch by Suman Saha. Patch by Suman Saha.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment