Kaydet (Commit) 30c48491 authored tarafından Fred Drake's avatar Fred Drake

Added several new tests to check the behavior with respect to doctype

declarations and weird markup that we used to accept & ignore that recent
versions raised an exception for; the original behavior has been restored
and augmented (the user can decide what to do if they care; the default is
to ignore it as done in early versions).
üst e822049e
...@@ -54,6 +54,9 @@ class EventCollector(sgmllib.SGMLParser): ...@@ -54,6 +54,9 @@ class EventCollector(sgmllib.SGMLParser):
def handle_pi(self, data): def handle_pi(self, data):
self.append(("pi", data)) self.append(("pi", data))
def unknown_decl(self, decl):
self.append(("unknown decl", decl))
class CDATAEventCollector(EventCollector): class CDATAEventCollector(EventCollector):
def start_cdata(self, attrs): def start_cdata(self, attrs):
...@@ -65,12 +68,24 @@ class SGMLParserTestCase(unittest.TestCase): ...@@ -65,12 +68,24 @@ class SGMLParserTestCase(unittest.TestCase):
collector = EventCollector collector = EventCollector
def check_events(self, source, expected_events): def get_events(self, source):
parser = self.collector() parser = self.collector()
for s in source: try:
parser.feed(s) for s in source:
parser.close() parser.feed(s)
events = parser.get_events() parser.close()
except:
#self.events = parser.events
raise
return parser.get_events()
def check_events(self, source, expected_events):
try:
events = self.get_events(source)
except:
import sys
#print >>sys.stderr, pprint.pformat(self.events)
raise
if events != expected_events: if events != expected_events:
self.fail("received events did not match expected events\n" self.fail("received events did not match expected events\n"
"Expected:\n" + pprint.pformat(expected_events) + "Expected:\n" + pprint.pformat(expected_events) +
...@@ -87,6 +102,31 @@ class SGMLParserTestCase(unittest.TestCase): ...@@ -87,6 +102,31 @@ class SGMLParserTestCase(unittest.TestCase):
self.fail("expected SGMLParseError for %r\nReceived:\n%s" self.fail("expected SGMLParseError for %r\nReceived:\n%s"
% (source, pprint.pformat(parser.get_events()))) % (source, pprint.pformat(parser.get_events())))
def test_doctype_decl_internal(self):
inside = """\
DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [
<!ELEMENT html - O EMPTY>
<!ATTLIST html
version CDATA #IMPLIED
profile CDATA 'DublinCore'>
<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
<!ENTITY myEntity 'internal parsed entity'>
<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
<!ENTITY % paramEntity 'name|name|name'>
%paramEntity;
<!-- comment -->
]"""
self.check_events(["<!%s>" % inside], [
("decl", inside),
])
def test_doctype_decl_external(self):
inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"
self.check_events("<!%s>" % inside, [
("decl", inside),
])
def test_underscore_in_attrname(self): def test_underscore_in_attrname(self):
# SF bug #436621 # SF bug #436621
"""Make sure attribute names with underscores are accepted""" """Make sure attribute names with underscores are accepted"""
...@@ -132,6 +172,16 @@ class SGMLParserTestCase(unittest.TestCase): ...@@ -132,6 +172,16 @@ class SGMLParserTestCase(unittest.TestCase):
("endtag", "b"), ("endtag", "b"),
]) ])
def test_bare_ampersands(self):
self.check_events("this text & contains & ampersands &", [
("data", "this text & contains & ampersands &"),
])
def test_bare_pointy_brackets(self):
self.check_events("this < text > contains < bare>pointy< brackets", [
("data", "this < text > contains < bare>pointy< brackets"),
])
def test_attr_syntax(self): def test_attr_syntax(self):
output = [ output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")]) ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
...@@ -156,6 +206,14 @@ class SGMLParserTestCase(unittest.TestCase): ...@@ -156,6 +206,14 @@ class SGMLParserTestCase(unittest.TestCase):
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
]) ])
def test_illegal_declarations(self):
s = 'abc<!spacer type="block" height="25">def'
self.check_events(s, [
("data", "abc"),
("unknown decl", 'spacer type="block" height="25"'),
("data", "def"),
])
def test_weird_starttags(self): def test_weird_starttags(self):
self.check_events("<a<a>", [ self.check_events("<a<a>", [
("starttag", "a", []), ("starttag", "a", []),
...@@ -196,6 +254,14 @@ class SGMLParserTestCase(unittest.TestCase): ...@@ -196,6 +254,14 @@ class SGMLParserTestCase(unittest.TestCase):
("endtag", "cdata"), ("endtag", "cdata"),
]) ])
def test_illegal_declarations(self):
s = 'abc<!spacer type="block" height="25">def'
self.check_events(s, [
("data", "abc"),
("unknown decl", 'spacer type="block" height="25"'),
("data", "def"),
])
# XXX These tests have been disabled by prefixing their names with # XXX These tests have been disabled by prefixing their names with
# an underscore. The first two exercise outstanding bugs in the # an underscore. The first two exercise outstanding bugs in the
# sgmllib module, and the third exhibits questionable behavior # sgmllib module, and the third exhibits questionable behavior
...@@ -240,4 +306,9 @@ class SGMLParserTestCase(unittest.TestCase): ...@@ -240,4 +306,9 @@ class SGMLParserTestCase(unittest.TestCase):
self.check_parse_error("<a foo=>") self.check_parse_error("<a foo=>")
test_support.run_unittest(SGMLParserTestCase) def test_main():
test_support.run_unittest(SGMLParserTestCase)
if __name__ == "__main__":
test_main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment