Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
fa3702dc
Kaydet (Commit)
fa3702dc
authored
Şub 10, 2012
tarafından
Ezio Melotti
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
#13960: HTMLParser is now able to handle broken comments when strict=False.
üst
5b14d732
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
57 additions
and
1 deletion
+57
-1
parser.py
Lib/html/parser.py
+23
-0
test_htmlparser.py
Lib/test/test_htmlparser.py
+30
-0
NEWS
Misc/NEWS
+4
-1
No files found.
Lib/html/parser.py
Dosyayı görüntüle @
fa3702dc
...
...
@@ -184,7 +184,17 @@ class HTMLParser(_markupbase.ParserBase):
elif
startswith
(
"<?"
,
i
):
k
=
self
.
parse_pi
(
i
)
elif
startswith
(
"<!"
,
i
):
# this might fail with things like <! not a comment > or
# <! -- space before '--' -->. When strict is True an
# error is raised, when it's False they will be considered
# as bogus comments and parsed (see parse_bogus_comment).
if
self
.
strict
:
k
=
self
.
parse_declaration
(
i
)
else
:
try
:
k
=
self
.
parse_declaration
(
i
)
except
HTMLParseError
:
k
=
self
.
parse_bogus_comment
(
i
)
elif
(
i
+
1
)
<
n
:
self
.
handle_data
(
"<"
)
k
=
i
+
1
...
...
@@ -256,6 +266,19 @@ class HTMLParser(_markupbase.ParserBase):
i
=
self
.
updatepos
(
i
,
n
)
self
.
rawdata
=
rawdata
[
i
:]
# Internal -- parse bogus comment, return length or -1 if not terminated
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
def
parse_bogus_comment
(
self
,
i
,
report
=
1
):
rawdata
=
self
.
rawdata
if
rawdata
[
i
:
i
+
2
]
!=
'<!'
:
self
.
error
(
'unexpected call to parse_comment()'
)
pos
=
rawdata
.
find
(
'>'
,
i
+
2
)
if
pos
==
-
1
:
return
-
1
if
report
:
self
.
handle_comment
(
rawdata
[
i
+
2
:
pos
])
return
pos
+
1
# Internal -- parse processing instr, return end or -1 if not terminated
def
parse_pi
(
self
,
i
):
rawdata
=
self
.
rawdata
...
...
Lib/test/test_htmlparser.py
Dosyayı görüntüle @
fa3702dc
...
...
@@ -323,6 +323,23 @@ DOCTYPE html [
(
"endtag"
,
element_lower
)],
collector
=
Collector
())
def
test_comments
(
self
):
html
=
(
"<!-- I'm a valid comment -->"
'<!--me too!-->'
'<!------>'
'<!---->'
'<!----I have many hyphens---->'
'<!-- I have a > in the middle -->'
'<!-- and I have -- in the middle! -->'
)
expected
=
[(
'comment'
,
" I'm a valid comment "
),
(
'comment'
,
'me too!'
),
(
'comment'
,
'--'
),
(
'comment'
,
''
),
(
'comment'
,
'--I have many hyphens--'
),
(
'comment'
,
' I have a > in the middle '
),
(
'comment'
,
' and I have -- in the middle! '
)]
self
.
_run_check
(
html
,
expected
)
def
test_condcoms
(
self
):
html
=
(
'<!--[if IE & !(lte IE 8)]>aren
\'
t<![endif]-->'
'<!--[if IE 8]>condcoms<![endif]-->'
...
...
@@ -426,6 +443,19 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
# see #12888
self
.
assertEqual
(
p
.
unescape
(
'{ '
*
1050
),
'{ '
*
1050
)
def
test_broken_comments
(
self
):
html
=
(
'<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!!! another bogus comment !!!>'
)
expected
=
[
(
'comment'
,
' not really a comment '
),
(
'comment'
,
' not a comment either --'
),
(
'comment'
,
' -- close enough --'
),
(
'comment'
,
'!! another bogus comment !!!'
),
]
self
.
_run_check
(
html
,
expected
)
def
test_broken_condcoms
(
self
):
# these condcoms are missing the '--' after '<!' and before the '>'
html
=
(
'<![if !(IE)]>broken condcom<![endif]>'
...
...
Misc/NEWS
Dosyayı görüntüle @
fa3702dc
...
...
@@ -113,6 +113,9 @@ Core and Builtins
Library
-------
- Issue #13960: HTMLParser is now able to handle broken comments when
strict=False.
- Issue #9021: Add an introduction to the copy module documentation.
- Issue #6005: Examples in the socket library documentation use sendall, where
...
...
@@ -123,7 +126,7 @@ Library
- Issue #10881: Fix test_site failure with OS X framework builds.
- Issue #964437 Make IDLE help window non-modal.
- Issue #964437
:
Make IDLE help window non-modal.
Patch by Guilherme Polo and Roger Serwy.
- Issue #2945: Make the distutils upload command aware of bdist_rpm products.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment