Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
19ff4ac7
Kaydet (Commit)
19ff4ac7
authored
Tem 16, 2001
tarafından
Fred Drake
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Add a unit test for sgmllib (needs work, but has already caught problems).
Based on the test for the HTMLParser module.
üst
14f6c18b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
243 additions
and
0 deletions
+243
-0
test_sgmllib.py
Lib/test/test_sgmllib.py
+243
-0
No files found.
Lib/test/test_sgmllib.py
0 → 100644
Dosyayı görüntüle @
19ff4ac7
import
pprint
import
sgmllib
import
test_support
import
unittest
class
EventCollector
(
sgmllib
.
SGMLParser
):
def
__init__
(
self
):
self
.
events
=
[]
self
.
append
=
self
.
events
.
append
sgmllib
.
SGMLParser
.
__init__
(
self
)
def
get_events
(
self
):
# Normalize the list of events so that buffer artefacts don't
# separate runs of contiguous characters.
L
=
[]
prevtype
=
None
for
event
in
self
.
events
:
type
=
event
[
0
]
if
type
==
prevtype
==
"data"
:
L
[
-
1
]
=
(
"data"
,
L
[
-
1
][
1
]
+
event
[
1
])
else
:
L
.
append
(
event
)
prevtype
=
type
self
.
events
=
L
return
L
# structure markup
def
unknown_starttag
(
self
,
tag
,
attrs
):
self
.
append
((
"starttag"
,
tag
,
attrs
))
def
unknown_endtag
(
self
,
tag
):
self
.
append
((
"endtag"
,
tag
))
# all other markup
def
handle_comment
(
self
,
data
):
self
.
append
((
"comment"
,
data
))
def
handle_charref
(
self
,
data
):
self
.
append
((
"charref"
,
data
))
def
handle_data
(
self
,
data
):
self
.
append
((
"data"
,
data
))
def
handle_decl
(
self
,
decl
):
self
.
append
((
"decl"
,
decl
))
def
handle_entityref
(
self
,
data
):
self
.
append
((
"entityref"
,
data
))
def
handle_pi
(
self
,
data
):
self
.
append
((
"pi"
,
data
))
class
CDATAEventCollector
(
EventCollector
):
def
start_cdata
(
self
,
attrs
):
self
.
append
((
"starttag"
,
"cdata"
,
attrs
))
self
.
setliteral
()
class
SGMLParserTestCase
(
unittest
.
TestCase
):
collector
=
EventCollector
def
check_events
(
self
,
source
,
expected_events
):
parser
=
self
.
collector
()
for
s
in
source
:
parser
.
feed
(
s
)
parser
.
close
()
events
=
parser
.
get_events
()
if
events
!=
expected_events
:
self
.
fail
(
"received events did not match expected events
\n
"
"Expected:
\n
"
+
pprint
.
pformat
(
expected_events
)
+
"
\n
Received:
\n
"
+
pprint
.
pformat
(
events
))
def
check_parse_error
(
self
,
source
):
parser
=
EventCollector
()
try
:
parser
.
feed
(
source
)
parser
.
close
()
except
sgmllib
.
SGMLParseError
:
pass
else
:
self
.
fail
(
"expected SGMLParseError for
%
r
\n
Received:
\n
%
s"
%
(
source
,
pprint
.
pformat
(
parser
.
get_events
())))
def
test_underscore_in_attrname
(
self
):
# SF bug #436621
"""Make sure attribute names with underscores are accepted"""
self
.
check_events
(
"<a has_under _under>"
,
[
(
"starttag"
,
"a"
,
[(
"has_under"
,
"has_under"
),
(
"_under"
,
"_under"
)]),
])
def
test_underscore_in_tagname
(
self
):
# SF bug #436621
"""Make sure tag names with underscores are accepted"""
self
.
check_events
(
"<has_under></has_under>"
,
[
(
"starttag"
,
"has_under"
,
[]),
(
"endtag"
,
"has_under"
),
])
def
test_quotes_in_unquoted_attrs
(
self
):
# SF bug #436621
"""Be sure quotes in unquoted attributes are made part of the value"""
self
.
check_events
(
"<a href=foo'bar
\"
baz>"
,
[
(
"starttag"
,
"a"
,
[(
"href"
,
"foo'bar
\"
baz"
)]),
])
def
test_xhtml_empty_tag
(
self
):
"""Handling of XHTML-style empty start tags"""
self
.
check_events
(
"<br />text<i></i>"
,
[
(
"starttag"
,
"br"
,
[]),
(
"data"
,
"text"
),
(
"starttag"
,
"i"
,
[]),
(
"endtag"
,
"i"
),
])
def
test_processing_instruction_only
(
self
):
self
.
check_events
(
"<?processing instruction>"
,
[
(
"pi"
,
"processing instruction"
),
])
def
test_bad_nesting
(
self
):
self
.
check_events
(
"<a><b></a></b>"
,
[
(
"starttag"
,
"a"
,
[]),
(
"starttag"
,
"b"
,
[]),
(
"endtag"
,
"a"
),
(
"endtag"
,
"b"
),
])
def
test_attr_syntax
(
self
):
output
=
[
(
"starttag"
,
"a"
,
[(
"b"
,
"v"
),
(
"c"
,
"v"
),
(
"d"
,
"v"
),
(
"e"
,
"e"
)])
]
self
.
check_events
(
"""<a b='v' c="v" d=v e>"""
,
output
)
self
.
check_events
(
"""<a b = 'v' c = "v" d = v e>"""
,
output
)
self
.
check_events
(
"""<a
\n
b
\n
=
\n
'v'
\n
c
\n
=
\n
"v"
\n
d
\n
=
\n
v
\n
e>"""
,
output
)
self
.
check_events
(
"""<a
\t
b
\t
=
\t
'v'
\t
c
\t
=
\t
"v"
\t
d
\t
=
\t
v
\t
e>"""
,
output
)
def
test_attr_values
(
self
):
self
.
check_events
(
"""<a b='xxx
\n\t
xxx' c="yyy
\t\n
yyy" d='
\t
xyz
\n
'>"""
,
[(
"starttag"
,
"a"
,
[(
"b"
,
"xxx
\n\t
xxx"
),
(
"c"
,
"yyy
\t\n
yyy"
),
(
"d"
,
"
\t
xyz
\n
"
)])
])
self
.
check_events
(
"""<a b='' c="">"""
,
[
(
"starttag"
,
"a"
,
[(
"b"
,
""
),
(
"c"
,
""
)]),
])
def
test_attr_funky_names
(
self
):
self
.
check_events
(
"""<a a.b='v' c:d=v e-f=v>"""
,
[
(
"starttag"
,
"a"
,
[(
"a.b"
,
"v"
),
(
"c:d"
,
"v"
),
(
"e-f"
,
"v"
)]),
])
def
test_weird_starttags
(
self
):
self
.
check_events
(
"<a<a>"
,
[
(
"starttag"
,
"a"
,
[]),
(
"starttag"
,
"a"
,
[]),
])
self
.
check_events
(
"</a<a>"
,
[
(
"endtag"
,
"a"
),
(
"starttag"
,
"a"
,
[]),
])
def
test_declaration_junk_chars
(
self
):
self
.
check_parse_error
(
"<!DOCTYPE foo $ >"
)
def
test_get_starttag_text
(
self
):
s
=
"""<foobar
\n
one="1"
\t
two=2 >"""
self
.
check_events
(
s
,
[
(
"starttag"
,
"foobar"
,
[(
"one"
,
"1"
),
(
"two"
,
"2"
)]),
])
def
test_cdata_content
(
self
):
s
=
(
"<cdata> <!-- not a comment --> ¬-an-entity-ref; </cdata>"
"<notcdata> <!-- comment --> </notcdata>"
)
self
.
collector
=
CDATAEventCollector
self
.
check_events
(
s
,
[
(
"starttag"
,
"cdata"
,
[]),
(
"data"
,
" <!-- not a comment --> ¬-an-entity-ref; "
),
(
"endtag"
,
"cdata"
),
(
"starttag"
,
"notcdata"
,
[]),
(
"data"
,
" "
),
(
"comment"
,
" comment "
),
(
"data"
,
" "
),
(
"endtag"
,
"notcdata"
),
])
s
=
"""<cdata> <not a='start tag'> </cdata>"""
self
.
check_events
(
s
,
[
(
"starttag"
,
"cdata"
,
[]),
(
"data"
,
" <not a='start tag'> "
),
(
"endtag"
,
"cdata"
),
])
# XXX These tests have been disabled by prefixing their names with
# an underscore. The first two exercise outstanding bugs in the
# sgmllib module, and the third exhibits questionable behavior
# that needs to be carefully considered before changing it.
def
_test_starttag_end_boundary
(
self
):
self
.
check_events
(
"""<a b='<'>"""
,
[(
"starttag"
,
"a"
,
[(
"b"
,
"<"
)])])
self
.
check_events
(
"""<a b='>'>"""
,
[(
"starttag"
,
"a"
,
[(
"b"
,
">"
)])])
def
_test_buffer_artefacts
(
self
):
output
=
[(
"starttag"
,
"a"
,
[(
"b"
,
"<"
)])]
self
.
check_events
([
"<a b='<'>"
],
output
)
self
.
check_events
([
"<a "
,
"b='<'>"
],
output
)
self
.
check_events
([
"<a b"
,
"='<'>"
],
output
)
self
.
check_events
([
"<a b="
,
"'<'>"
],
output
)
self
.
check_events
([
"<a b='<"
,
"'>"
],
output
)
self
.
check_events
([
"<a b='<'"
,
">"
],
output
)
output
=
[(
"starttag"
,
"a"
,
[(
"b"
,
">"
)])]
self
.
check_events
([
"<a b='>'>"
],
output
)
self
.
check_events
([
"<a "
,
"b='>'>"
],
output
)
self
.
check_events
([
"<a b"
,
"='>'>"
],
output
)
self
.
check_events
([
"<a b="
,
"'>'>"
],
output
)
self
.
check_events
([
"<a b='>"
,
"'>"
],
output
)
self
.
check_events
([
"<a b='>'"
,
">"
],
output
)
def
_test_starttag_junk_chars
(
self
):
self
.
check_parse_error
(
"<"
)
self
.
check_parse_error
(
"<>"
)
self
.
check_parse_error
(
"</$>"
)
self
.
check_parse_error
(
"</"
)
self
.
check_parse_error
(
"</a"
)
self
.
check_parse_error
(
"<$"
)
self
.
check_parse_error
(
"<$>"
)
self
.
check_parse_error
(
"<!"
)
self
.
check_parse_error
(
"<a $>"
)
self
.
check_parse_error
(
"<a"
)
self
.
check_parse_error
(
"<a foo='bar'"
)
self
.
check_parse_error
(
"<a foo='bar"
)
self
.
check_parse_error
(
"<a foo='>'"
)
self
.
check_parse_error
(
"<a foo='>"
)
self
.
check_parse_error
(
"<a foo=>"
)
test_support
.
run_unittest
(
SGMLParserTestCase
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment