Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
ff847d1a
Kaydet (Commit)
ff847d1a
authored
Kas 23, 2017
tarafından
Miss Islington (bot)
Kaydeden (comit)
Raymond Hettinger
Kas 23, 2017
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)
(cherry picked from commit
3df02dbc
)
üst
a645b23f
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
19 additions
and
12 deletions
+19
-12
urllib.robotparser.rst
Doc/library/urllib.robotparser.rst
+4
-4
test_robotparser.py
Lib/test/test_robotparser.py
+6
-3
robotparser.py
Lib/urllib/robotparser.py
+4
-5
2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
...S.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
+5
-0
No files found.
Doc/library/urllib.robotparser.rst
Dosyayı görüntüle @
ff847d1a
...
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
...
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
.. method:: request_rate(useragent)
.. method:: request_rate(useragent)
Returns the contents of the ``Request-rate`` parameter from
Returns the contents of the ``Request-rate`` parameter from
``robots.txt``
in the form of a :func:`~collections.namedtuple`
``robots.txt``
as a :term:`named tuple` ``RequestRate(requests, seconds)``.
``(requests, seconds)``. If there is no such parameter or it doesn't
If there is no such parameter or it doesn't apply to the *useragent*
apply to the *useragent* specified or the ``robots.txt`` entry for this
specified or the ``robots.txt`` entry for this parameter has invalid
parameter has invalid
syntax, return ``None``.
syntax, return ``None``.
.. versionadded:: 3.6
.. versionadded:: 3.6
...
...
Lib/test/test_robotparser.py
Dosyayı görüntüle @
ff847d1a
...
@@ -2,7 +2,6 @@ import io
...
@@ -2,7 +2,6 @@ import io
import
os
import
os
import
unittest
import
unittest
import
urllib.robotparser
import
urllib.robotparser
from
collections
import
namedtuple
from
test
import
support
from
test
import
support
from
http.server
import
BaseHTTPRequestHandler
,
HTTPServer
from
http.server
import
BaseHTTPRequestHandler
,
HTTPServer
try
:
try
:
...
@@ -90,6 +89,10 @@ class BaseRequestRateTest(BaseRobotTest):
...
@@ -90,6 +89,10 @@ class BaseRequestRateTest(BaseRobotTest):
self
.
parser
.
crawl_delay
(
agent
),
self
.
crawl_delay
self
.
parser
.
crawl_delay
(
agent
),
self
.
crawl_delay
)
)
if
self
.
request_rate
:
if
self
.
request_rate
:
self
.
assertIsInstance
(
self
.
parser
.
request_rate
(
agent
),
urllib
.
robotparser
.
RequestRate
)
self
.
assertEqual
(
self
.
assertEqual
(
self
.
parser
.
request_rate
(
agent
)
.
requests
,
self
.
parser
.
request_rate
(
agent
)
.
requests
,
self
.
request_rate
.
requests
self
.
request_rate
.
requests
...
@@ -111,7 +114,7 @@ Disallow: /a%2fb.html
...
@@ -111,7 +114,7 @@ Disallow: /a%2fb.html
Disallow: /
%7
ejoe/index.html
Disallow: /
%7
ejoe/index.html
"""
"""
agent
=
'figtree'
agent
=
'figtree'
request_rate
=
namedtuple
(
'req_rate'
,
'requests seconds'
)
(
9
,
30
)
request_rate
=
urllib
.
robotparser
.
RequestRate
(
9
,
30
)
crawl_delay
=
3
crawl_delay
=
3
good
=
[(
'figtree'
,
'/foo.html'
)]
good
=
[(
'figtree'
,
'/foo.html'
)]
bad
=
[
'/tmp'
,
'/tmp.html'
,
'/tmp/a.html'
,
'/a
%3
cd.html'
,
'/a
%3
Cd.html'
,
bad
=
[
'/tmp'
,
'/tmp.html'
,
'/tmp/a.html'
,
'/a
%3
cd.html'
,
'/a
%3
Cd.html'
,
...
@@ -240,7 +243,7 @@ Crawl-delay: 1
...
@@ -240,7 +243,7 @@ Crawl-delay: 1
Request-rate: 3/15
Request-rate: 3/15
Disallow: /cyberworld/map/
Disallow: /cyberworld/map/
"""
"""
request_rate
=
namedtuple
(
'req_rate'
,
'requests seconds'
)
(
3
,
15
)
request_rate
=
urllib
.
robotparser
.
RequestRate
(
3
,
15
)
crawl_delay
=
1
crawl_delay
=
1
good
=
[
'/'
,
'/test.html'
]
good
=
[
'/'
,
'/test.html'
]
bad
=
[
'/cyberworld/map/index.html'
]
bad
=
[
'/cyberworld/map/index.html'
]
...
...
Lib/urllib/robotparser.py
Dosyayı görüntüle @
ff847d1a
...
@@ -16,6 +16,9 @@ import urllib.request
...
@@ -16,6 +16,9 @@ import urllib.request
__all__
=
[
"RobotFileParser"
]
__all__
=
[
"RobotFileParser"
]
RequestRate
=
collections
.
namedtuple
(
"RequestRate"
,
"requests seconds"
)
class
RobotFileParser
:
class
RobotFileParser
:
""" This class provides a set of methods to read, parse and answer
""" This class provides a set of methods to read, parse and answer
questions about a single robots.txt file.
questions about a single robots.txt file.
...
@@ -136,11 +139,7 @@ class RobotFileParser:
...
@@ -136,11 +139,7 @@ class RobotFileParser:
# check if all values are sane
# check if all values are sane
if
(
len
(
numbers
)
==
2
and
numbers
[
0
]
.
strip
()
.
isdigit
()
if
(
len
(
numbers
)
==
2
and
numbers
[
0
]
.
strip
()
.
isdigit
()
and
numbers
[
1
]
.
strip
()
.
isdigit
()):
and
numbers
[
1
]
.
strip
()
.
isdigit
()):
req_rate
=
collections
.
namedtuple
(
'req_rate'
,
entry
.
req_rate
=
RequestRate
(
int
(
numbers
[
0
]),
int
(
numbers
[
1
]))
'requests seconds'
)
entry
.
req_rate
=
req_rate
entry
.
req_rate
.
requests
=
int
(
numbers
[
0
])
entry
.
req_rate
.
seconds
=
int
(
numbers
[
1
])
state
=
2
state
=
2
if
state
==
2
:
if
state
==
2
:
self
.
_add_entry
(
entry
)
self
.
_add_entry
(
entry
)
...
...
Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
0 → 100644
Dosyayı görüntüle @
ff847d1a
Fix wrong usage of :func:`collections.namedtuple` in
the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
method.
Initial patch by Robin Wellner.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment