Kaydet (Commit) 88ebfb12 authored tarafından Ezio Melotti's avatar Ezio Melotti

#15114: The html.parser module now raises a DeprecationWarning when the strict…

#15114: The html.parser module now raises a DeprecationWarning when the strict argument of HTMLParser or the HTMLParser.error method are used.
üst 28f0beaf
...@@ -74,7 +74,7 @@ as they are encountered:: ...@@ -74,7 +74,7 @@ as they are encountered::
def handle_data(self, data): def handle_data(self, data):
print("Encountered some data :", data) print("Encountered some data :", data)
parser = MyHTMLParser(strict=False) parser = MyHTMLParser()
parser.feed('<html><head><title>Test</title></head>' parser.feed('<html><head><title>Test</title></head>'
'<body><h1>Parse me!</h1></body></html>') '<body><h1>Parse me!</h1></body></html>')
...@@ -272,7 +272,7 @@ examples:: ...@@ -272,7 +272,7 @@ examples::
def handle_decl(self, data): def handle_decl(self, data):
print("Decl :", data) print("Decl :", data)
parser = MyHTMLParser(strict=False) parser = MyHTMLParser()
Parsing a doctype:: Parsing a doctype::
......
...@@ -94,6 +94,8 @@ class HTMLParseError(Exception): ...@@ -94,6 +94,8 @@ class HTMLParseError(Exception):
return result return result
_strict_sentinel = object()
class HTMLParser(_markupbase.ParserBase): class HTMLParser(_markupbase.ParserBase):
"""Find tags and other markup and call handler functions. """Find tags and other markup and call handler functions.
...@@ -116,16 +118,18 @@ class HTMLParser(_markupbase.ParserBase): ...@@ -116,16 +118,18 @@ class HTMLParser(_markupbase.ParserBase):
CDATA_CONTENT_ELEMENTS = ("script", "style") CDATA_CONTENT_ELEMENTS = ("script", "style")
def __init__(self, strict=False): def __init__(self, strict=_strict_sentinel):
"""Initialize and reset this instance. """Initialize and reset this instance.
If strict is set to False (the default) the parser will parse invalid If strict is set to False (the default) the parser will parse invalid
markup, otherwise it will raise an error. Note that the strict mode markup, otherwise it will raise an error. Note that the strict mode
is deprecated. and argument are deprecated.
""" """
if strict: if strict is not _strict_sentinel:
warnings.warn("The strict mode is deprecated.", warnings.warn("The strict argument and mode are deprecated.",
DeprecationWarning, stacklevel=2) DeprecationWarning, stacklevel=2)
else:
strict = False # default
self.strict = strict self.strict = strict
self.reset() self.reset()
...@@ -151,6 +155,8 @@ class HTMLParser(_markupbase.ParserBase): ...@@ -151,6 +155,8 @@ class HTMLParser(_markupbase.ParserBase):
self.goahead(1) self.goahead(1)
def error(self, message): def error(self, message):
warnings.warn("The 'error' method is deprecated.",
DeprecationWarning, stacklevel=2)
raise HTMLParseError(message, self.getpos()) raise HTMLParseError(message, self.getpos())
__starttag_text = None __starttag_text = None
......
...@@ -96,7 +96,9 @@ class TestCaseBase(unittest.TestCase): ...@@ -96,7 +96,9 @@ class TestCaseBase(unittest.TestCase):
parser = self.get_collector() parser = self.get_collector()
parser.feed(source) parser.feed(source)
parser.close() parser.close()
self.assertRaises(html.parser.HTMLParseError, parse) with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
parse()
class HTMLParserStrictTestCase(TestCaseBase): class HTMLParserStrictTestCase(TestCaseBase):
...@@ -360,7 +362,16 @@ text ...@@ -360,7 +362,16 @@ text
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
def get_collector(self): def get_collector(self):
return EventCollector(strict=False) return EventCollector()
def test_deprecation_warnings(self):
with self.assertWarns(DeprecationWarning):
EventCollector(strict=True)
with self.assertWarns(DeprecationWarning):
EventCollector(strict=False)
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
EventCollector().error('test')
def test_tolerant_parsing(self): def test_tolerant_parsing(self):
self._run_check('<html <html>te>>xt&a<<bc</a></html>\n' self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'
...@@ -676,7 +687,7 @@ class AttributesStrictTestCase(TestCaseBase): ...@@ -676,7 +687,7 @@ class AttributesStrictTestCase(TestCaseBase):
class AttributesTolerantTestCase(AttributesStrictTestCase): class AttributesTolerantTestCase(AttributesStrictTestCase):
def get_collector(self): def get_collector(self):
return EventCollector(strict=False) return EventCollector()
def test_attr_funky_names2(self): def test_attr_funky_names2(self):
self._run_check( self._run_check(
......
...@@ -31,6 +31,9 @@ Core and Builtins ...@@ -31,6 +31,9 @@ Core and Builtins
Library Library
------- -------
- Issue #15114: The html.parser module now raises a DeprecationWarning when the
strict argument of HTMLParser or the HTMLParser.error method are used.
- Issue #19410: Undo the special-casing removal of '' for - Issue #19410: Undo the special-casing removal of '' for
importlib.machinery.FileFinder. importlib.machinery.FileFinder.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment