Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
16e6f7de
Unverified
Kaydet (Commit)
16e6f7de
authored
Mar 07, 2019
tarafından
Steve Dower
Kaydeden (comit)
GitHub
Mar 07, 2019
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
bpo-36216: Add check for characters in netloc that normalize to separators (GH-12201)
üst
1f58f4fa
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
61 additions
and
0 deletions
+61
-0
urllib.parse.rst
Doc/library/urllib.parse.rst
+18
-0
test_urlparse.py
Lib/test/test_urlparse.py
+23
-0
parse.py
Lib/urllib/parse.py
+17
-0
2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
....d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
+3
-0
No files found.
Doc/library/urllib.parse.rst
Dosyayı görüntüle @
16e6f7de
...
...
@@ -124,6 +124,11 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
Characters in the :attr:`netloc` attribute that decompose under NFKC
normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
...
...
@@ -136,6 +141,10 @@ or on combining URL components into a URL string.
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
.. versionchanged:: 3.8
Characters that affect netloc parsing under NFKC normalization will
now raise :exc:`ValueError`.
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
...
...
@@ -259,10 +268,19 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
Characters in the :attr:`netloc` attribute that decompose under NFKC
normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
.. versionchanged:: 3.8
Characters that affect netloc parsing under NFKC normalization will
now raise :exc:`ValueError`.
.. function:: urlunsplit(parts)
...
...
Lib/test/test_urlparse.py
Dosyayı görüntüle @
16e6f7de
import
sys
import
unicodedata
import
unittest
import
urllib.parse
...
...
@@ -994,6 +996,27 @@ class UrlParseTestCase(unittest.TestCase):
expected
.
append
(
name
)
self
.
assertCountEqual
(
urllib
.
parse
.
__all__
,
expected
)
def
test_urlsplit_normalization
(
self
):
# Certain characters should never occur in the netloc,
# including under normalization.
# Ensure that ALL of them are detected and cause an error
illegal_chars
=
'/:#?@'
hex_chars
=
{
'{:04X}'
.
format
(
ord
(
c
))
for
c
in
illegal_chars
}
denorm_chars
=
[
c
for
c
in
map
(
chr
,
range
(
128
,
sys
.
maxunicode
))
if
(
hex_chars
&
set
(
unicodedata
.
decomposition
(
c
)
.
split
()))
and
c
not
in
illegal_chars
]
# Sanity check that we found at least one such character
self
.
assertIn
(
'
\u2100
'
,
denorm_chars
)
self
.
assertIn
(
'
\uFF03
'
,
denorm_chars
)
for
scheme
in
[
"http"
,
"https"
,
"ftp"
]:
for
c
in
denorm_chars
:
url
=
"{}://netloc{}false.netloc/path"
.
format
(
scheme
,
c
)
with
self
.
subTest
(
url
=
url
,
char
=
'{:04X}'
.
format
(
ord
(
c
))):
with
self
.
assertRaises
(
ValueError
):
urllib
.
parse
.
urlsplit
(
url
)
class
Utility_Tests
(
unittest
.
TestCase
):
"""Testcase to test the various utility functions in the urllib."""
...
...
Lib/urllib/parse.py
Dosyayı görüntüle @
16e6f7de
...
...
@@ -396,6 +396,21 @@ def _splitnetloc(url, start=0):
delim
=
min
(
delim
,
wdelim
)
# use earliest delim position
return
url
[
start
:
delim
],
url
[
delim
:]
# return (domain, rest)
def
_checknetloc
(
netloc
):
if
not
netloc
or
netloc
.
isascii
():
return
# looking for characters like \u2100 that expand to 'a/c'
# IDNA uses NFKC equivalence, so normalize for this check
import
unicodedata
netloc2
=
unicodedata
.
normalize
(
'NFKC'
,
netloc
)
if
netloc
==
netloc2
:
return
_
,
_
,
netloc
=
netloc
.
rpartition
(
'@'
)
# anything to the left of '@' is okay
for
c
in
'/?#@:'
:
if
c
in
netloc2
:
raise
ValueError
(
"netloc '"
+
netloc2
+
"' contains invalid "
+
"characters under NFKC normalization"
)
def
urlsplit
(
url
,
scheme
=
''
,
allow_fragments
=
True
):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
...
...
@@ -424,6 +439,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url
,
fragment
=
url
.
split
(
'#'
,
1
)
if
'?'
in
url
:
url
,
query
=
url
.
split
(
'?'
,
1
)
_checknetloc
(
netloc
)
v
=
SplitResult
(
'http'
,
netloc
,
url
,
query
,
fragment
)
_parse_cache
[
key
]
=
v
return
_coerce_result
(
v
)
...
...
@@ -447,6 +463,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url
,
fragment
=
url
.
split
(
'#'
,
1
)
if
'?'
in
url
:
url
,
query
=
url
.
split
(
'?'
,
1
)
_checknetloc
(
netloc
)
v
=
SplitResult
(
scheme
,
netloc
,
url
,
query
,
fragment
)
_parse_cache
[
key
]
=
v
return
_coerce_result
(
v
)
...
...
Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
0 → 100644
Dosyayı görüntüle @
16e6f7de
Changes urlsplit() to raise ValueError when the URL contains characters that
decompose under IDNA encoding (NFKC-normalization) into characters that
affect how the URL is parsed.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment