Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
46495182
Kaydet (Commit)
46495182
authored
Haz 24, 2012
tarafından
Ezio Melotti
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
#15156: HTMLParser now uses the new "html.entities.html5" dictionary.
üst
a504a7a7
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
22 deletions
+23
-22
html.entities.rst
Doc/library/html.entities.rst
+0
-4
parser.py
Lib/html/parser.py
+15
-17
test_htmlparser.py
Lib/test/test_htmlparser.py
+6
-1
NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/html.entities.rst
Dosyayı görüntüle @
46495182
...
@@ -11,10 +11,6 @@
...
@@ -11,10 +11,6 @@
This module defines four dictionaries, :data:`html5`,
This module defines four dictionaries, :data:`html5`,
:data:`name2codepoint`, :data:`codepoint2name`, and :data:`entitydefs`.
:data:`name2codepoint`, :data:`codepoint2name`, and :data:`entitydefs`.
:data:`entitydefs` is used to provide the :attr:`entitydefs`
attribute of the :class:`html.parser.HTMLParser` class. The definition provided
here contains all the entities defined by XHTML 1.0 that can be handled using
simple textual substitution in the Latin-1 character set (ISO-8859-1).
.. data:: html5
.. data:: html5
...
...
Lib/html/parser.py
Dosyayı görüntüle @
46495182
...
@@ -500,7 +500,6 @@ class HTMLParser(_markupbase.ParserBase):
...
@@ -500,7 +500,6 @@ class HTMLParser(_markupbase.ParserBase):
self
.
error
(
"unknown declaration:
%
r"
%
(
data
,))
self
.
error
(
"unknown declaration:
%
r"
%
(
data
,))
# Internal -- helper to remove special character quoting
# Internal -- helper to remove special character quoting
entitydefs
=
None
def
unescape
(
self
,
s
):
def
unescape
(
self
,
s
):
if
'&'
not
in
s
:
if
'&'
not
in
s
:
return
s
return
s
...
@@ -510,24 +509,23 @@ class HTMLParser(_markupbase.ParserBase):
...
@@ -510,24 +509,23 @@ class HTMLParser(_markupbase.ParserBase):
if
s
[
0
]
==
"#"
:
if
s
[
0
]
==
"#"
:
s
=
s
[
1
:]
s
=
s
[
1
:]
if
s
[
0
]
in
[
'x'
,
'X'
]:
if
s
[
0
]
in
[
'x'
,
'X'
]:
c
=
int
(
s
[
1
:],
16
)
c
=
int
(
s
[
1
:]
.
rstrip
(
';'
)
,
16
)
else
:
else
:
c
=
int
(
s
)
c
=
int
(
s
.
rstrip
(
';'
)
)
return
chr
(
c
)
return
chr
(
c
)
except
ValueError
:
except
ValueError
:
return
'&#'
+
s
+
';'
return
'&#'
+
s
else
:
else
:
# Cannot use name2codepoint directly, because HTMLParser
from
html.entities
import
html5
# supports apos, which is not part of HTML 4
if
s
in
html5
:
import
html.entities
return
html5
[
s
]
if
HTMLParser
.
entitydefs
is
None
:
elif
s
.
endswith
(
';'
):
entitydefs
=
HTMLParser
.
entitydefs
=
{
'apos'
:
"'"
}
return
'&'
+
s
for
k
,
v
in
html
.
entities
.
name2codepoint
.
items
():
for
x
in
range
(
2
,
len
(
s
)):
entitydefs
[
k
]
=
chr
(
v
)
if
s
[:
x
]
in
html5
:
try
:
return
html5
[
s
[:
x
]]
+
s
[
x
:]
return
self
.
entitydefs
[
s
]
else
:
except
KeyError
:
return
'&'
+
s
return
'&'
+
s
+
';'
return
re
.
sub
(
r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))"
,
return
re
.
sub
(
r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));"
,
replaceEntities
,
s
,
flags
=
re
.
ASCII
)
replaceEntities
,
s
,
flags
=
re
.
ASCII
)
Lib/test/test_htmlparser.py
Dosyayı görüntüle @
46495182
...
@@ -456,7 +456,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
...
@@ -456,7 +456,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
self
.
_run_check
(
'<form action="/xxx.php?a=1&b=2&", '
self
.
_run_check
(
'<form action="/xxx.php?a=1&b=2&", '
'method="post">'
,
[
'method="post">'
,
[
(
'starttag'
,
'form'
,
(
'starttag'
,
'form'
,
[(
'action'
,
'/xxx.php?a=1&b=2&
amp
'
),
[(
'action'
,
'/xxx.php?a=1&b=2&'
),
(
','
,
None
),
(
'method'
,
'post'
)])])
(
','
,
None
),
(
'method'
,
'post'
)])])
def
test_weird_chars_in_unquoted_attribute_values
(
self
):
def
test_weird_chars_in_unquoted_attribute_values
(
self
):
...
@@ -541,6 +541,11 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
...
@@ -541,6 +541,11 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
self
.
assertEqual
(
p
.
unescape
(
'&'
),
'&'
)
self
.
assertEqual
(
p
.
unescape
(
'&'
),
'&'
)
# see #12888
# see #12888
self
.
assertEqual
(
p
.
unescape
(
'{ '
*
1050
),
'{ '
*
1050
)
self
.
assertEqual
(
p
.
unescape
(
'{ '
*
1050
),
'{ '
*
1050
)
# see #15156
self
.
assertEqual
(
p
.
unescape
(
'ÉricÉric'
'&alphacentauriαcentauri'
),
'ÉricÉric&alphacentauriαcentauri'
)
self
.
assertEqual
(
p
.
unescape
(
'&co;'
),
'&co;'
)
def
test_broken_comments
(
self
):
def
test_broken_comments
(
self
):
html
=
(
'<! not really a comment >'
html
=
(
'<! not really a comment >'
...
...
Misc/NEWS
Dosyayı görüntüle @
46495182
...
@@ -76,6 +76,8 @@ Library
...
@@ -76,6 +76,8 @@ Library
It is used automatically on platforms supporting the necessary os.openat()
It is used automatically on platforms supporting the necessary os.openat()
and os.unlinkat() functions. Main code by Martin von Löwis.
and os.unlinkat() functions. Main code by Martin von Löwis.
- Issue #15156: HTMLParser now uses the new "html.entities.html5" dictionary.
- Issue #11113: add a new "html5" dictionary containing the named character
- Issue #11113: add a new "html5" dictionary containing the named character
references defined by the HTML5 standard and the equivalent Unicode
references defined by the HTML5 standard and the equivalent Unicode
character(s) to the html.entities module.
character(s) to the html.entities module.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment