Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
7165d8b9
Kaydet (Commit)
7165d8b9
authored
Kas 07, 2013
tarafından
Ezio Melotti
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
#19480: HTMLParser now accepts all valid start-tag names as defined by the HTML5 standard.
üst
d5a2f0b3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
13 deletions
+28
-13
parser.py
Lib/html/parser.py
+12
-9
test_htmlparser.py
Lib/test/test_htmlparser.py
+13
-4
NEWS
Misc/NEWS
+3
-0
No files found.
Lib/html/parser.py
Dosyayı görüntüle @
7165d8b9
...
...
@@ -23,16 +23,16 @@ charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen
=
re
.
compile
(
'<[a-zA-Z]'
)
piclose
=
re
.
compile
(
'>'
)
commentclose
=
re
.
compile
(
r'--\s*>'
)
tagfind
=
re
.
compile
(
'([a-zA-Z][-.a-zA-Z0-9:_]*)(?:
\
s|/(?!>))*'
)
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
tagfind_tolerant
=
re
.
compile
(
'[a-zA-Z][^
\t\n\r\f
/>
\x00
]*'
)
# Note:
# 1) the strict attrfind isn't really strict, but we can't make it
# correctly strict without breaking backward compatibility;
# 2) if you change attrfind remember to update locatestarttagend too;
# 3) if you change attrfind and/or locatestarttagend the parser will
# 2) if you change
tagfind/
attrfind remember to update locatestarttagend too;
# 3) if you change
tagfind/
attrfind and/or locatestarttagend the parser will
# explode, so don't do it.
tagfind
=
re
.
compile
(
'([a-zA-Z][-.a-zA-Z0-9:_]*)(?:
\
s|/(?!>))*'
)
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
tagfind_tolerant
=
re
.
compile
(
'([a-zA-Z][^
\t\n\r\f
/>
\x00
]*)(?:
\
s|/(?!>))*'
)
attrfind
=
re
.
compile
(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?'
)
...
...
@@ -54,7 +54,7 @@ locatestarttagend = re.compile(r"""
\s* # trailing whitespace
"""
,
re
.
VERBOSE
)
locatestarttagend_tolerant
=
re
.
compile
(
r"""
<[a-zA-Z][
-.a-zA-Z0-9:_]*
# tag name
<[a-zA-Z][
^\t\n\r\f />\x00]*
# tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
...
...
@@ -328,7 +328,10 @@ class HTMLParser(_markupbase.ParserBase):
# Now parse the data between i+1 and j into a tag and attrs
attrs
=
[]
match
=
tagfind
.
match
(
rawdata
,
i
+
1
)
if
self
.
strict
:
match
=
tagfind
.
match
(
rawdata
,
i
+
1
)
else
:
match
=
tagfind_tolerant
.
match
(
rawdata
,
i
+
1
)
assert
match
,
'unexpected call to parse_starttag()'
k
=
match
.
end
()
self
.
lasttag
=
tag
=
match
.
group
(
1
)
.
lower
()
...
...
@@ -440,7 +443,7 @@ class HTMLParser(_markupbase.ParserBase):
return
i
+
3
else
:
return
self
.
parse_bogus_comment
(
i
)
tagname
=
namematch
.
group
()
.
lower
()
tagname
=
namematch
.
group
(
1
)
.
lower
()
# consume and ignore other stuff between the name and the >
# Note: this is not 100% correct, since we might have things like
# </tag attr=">">, but looking for > after tha name should cover
...
...
Lib/test/test_htmlparser.py
Dosyayı görüntüle @
7165d8b9
...
...
@@ -229,6 +229,11 @@ text
self
.
_parse_error
(
"<a foo='bar"
)
self
.
_parse_error
(
"<a foo='>'"
)
self
.
_parse_error
(
"<a foo='>"
)
self
.
_parse_error
(
"<a$>"
)
self
.
_parse_error
(
"<a$b>"
)
self
.
_parse_error
(
"<a$b/>"
)
self
.
_parse_error
(
"<a$b >"
)
self
.
_parse_error
(
"<a$b />"
)
def
test_valid_doctypes
(
self
):
# from http://www.w3.org/QA/2002/04/valid-dtd-list.html
...
...
@@ -368,8 +373,8 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
(
'starttag'
,
'html'
,
[(
'<html'
,
None
)]),
(
'data'
,
'te>>xt'
),
(
'entityref'
,
'a'
),
(
'data'
,
'<
<bc
'
),
(
'
endtag'
,
'a'
),
(
'data'
,
'<'
),
(
'
starttag'
,
'bc<'
,
[(
'a'
,
None
)]
),
(
'endtag'
,
'html'
),
(
'data'
,
'
\n
<img src="URL>'
),
(
'comment'
,
'/img'
),
...
...
@@ -380,8 +385,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
self
.
_run_check
(
"</$>"
,
[(
'comment'
,
'$'
)])
self
.
_run_check
(
"</"
,
[(
'data'
,
'</'
)])
self
.
_run_check
(
"</a"
,
[(
'data'
,
'</a'
)])
# XXX this might be wrong
self
.
_run_check
(
"<a<a>"
,
[(
'data'
,
'<a'
),
(
'starttag'
,
'a'
,
[])])
self
.
_run_check
(
"<a<a>"
,
[(
'starttag'
,
'a<a'
,
[])])
self
.
_run_check
(
"</a<a>"
,
[(
'endtag'
,
'a<a'
)])
self
.
_run_check
(
"<!"
,
[(
'data'
,
'<!'
)])
self
.
_run_check
(
"<a"
,
[(
'data'
,
'<a'
)])
...
...
@@ -389,6 +393,11 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
self
.
_run_check
(
"<a foo='bar"
,
[(
'data'
,
"<a foo='bar"
)])
self
.
_run_check
(
"<a foo='>'"
,
[(
'data'
,
"<a foo='>'"
)])
self
.
_run_check
(
"<a foo='>"
,
[(
'data'
,
"<a foo='>"
)])
self
.
_run_check
(
"<a$>"
,
[(
'starttag'
,
'a$'
,
[])])
self
.
_run_check
(
"<a$b>"
,
[(
'starttag'
,
'a$b'
,
[])])
self
.
_run_check
(
"<a$b/>"
,
[(
'startendtag'
,
'a$b'
,
[])])
self
.
_run_check
(
"<a$b >"
,
[(
'starttag'
,
'a$b'
,
[])])
self
.
_run_check
(
"<a$b />"
,
[(
'startendtag'
,
'a$b'
,
[])])
def
test_slashes_in_starttag
(
self
):
self
.
_run_check
(
'<a foo="var"/>'
,
[(
'startendtag'
,
'a'
,
[(
'foo'
,
'var'
)])])
...
...
Misc/NEWS
Dosyayı görüntüle @
7165d8b9
...
...
@@ -13,6 +13,9 @@ Core and Builtins
Library
-------
- Issue #19480: HTMLParser now accepts all valid start-tag names as defined
by the HTML5 standard.
- Issue #6157: Fixed tkinter.Text.debug(). Original patch by Guilherme Polo.
- Issue #6160: The bbox() method of tkinter.Spinbox now returns a tuple of
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment