Kaydet (Commit) 6f2bb989 authored tarafından Ezio Melotti's avatar Ezio Melotti

#23144: Make sure that HTMLParser.feed() returns all the data, even when convert_charrefs is True.

üst 527ef079
...@@ -198,7 +198,15 @@ class HTMLParser(_markupbase.ParserBase): ...@@ -198,7 +198,15 @@ class HTMLParser(_markupbase.ParserBase):
if self.convert_charrefs and not self.cdata_elem: if self.convert_charrefs and not self.cdata_elem:
j = rawdata.find('<', i) j = rawdata.find('<', i)
if j < 0: if j < 0:
if not end: # if we can't find the next <, either we are at the end
# or there's more text incoming. If the latter is True,
# we can't pass the text to handle_data in case we have
# a charref cut in half at end. Try to determine if
# this is the case before proceding by looking for an
# & near the end and see if it's followed by a space or ;.
amppos = rawdata.rfind('&', max(i, n-34))
if (amppos >= 0 and
not re.compile(r'[\s;]').search(rawdata, amppos)):
break # wait till we get all the text break # wait till we get all the text
j = n j = n
else: else:
......
...@@ -72,9 +72,6 @@ class EventCollectorExtra(EventCollector): ...@@ -72,9 +72,6 @@ class EventCollectorExtra(EventCollector):
class EventCollectorCharrefs(EventCollector): class EventCollectorCharrefs(EventCollector):
def get_events(self):
return self.events
def handle_charref(self, data): def handle_charref(self, data):
self.fail('This should never be called with convert_charrefs=True') self.fail('This should never be called with convert_charrefs=True')
...@@ -685,6 +682,18 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): ...@@ -685,6 +682,18 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
] ]
self._run_check(html, expected) self._run_check(html, expected)
def test_convert_charrefs_dropped_text(self):
# #23144: make sure that all the events are triggered when
# convert_charrefs is True, even if we don't call .close()
parser = EventCollector(convert_charrefs=True)
# before the fix, bar & baz was missing
parser.feed("foo <a>link</a> bar &amp; baz")
self.assertEqual(
parser.get_events(),
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
('endtag', 'a'), ('data', ' bar & baz')]
)
class AttributesStrictTestCase(TestCaseBase): class AttributesStrictTestCase(TestCaseBase):
......
+++++++++++ +++++++++++
Python News Python News
+++++++++++ +++++++++++
...@@ -81,6 +81,9 @@ Core and Builtins ...@@ -81,6 +81,9 @@ Core and Builtins
Library Library
------- -------
- Issue #23144: Make sure that HTMLParser.feed() returns all the data, even
when convert_charrefs is True.
- Issue #16180: Exit pdb if file has syntax error, instead of trapping user - Issue #16180: Exit pdb if file has syntax error, instead of trapping user
in an infinite loop. Patch by Xavier de Gaye. in an infinite loop. Patch by Xavier de Gaye.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment