Kaydet (Commit) ff847d1a authored tarafından Miss Islington (bot)'s avatar Miss Islington (bot) Kaydeden (comit) Raymond Hettinger

bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)

(cherry picked from commit 3df02dbc)
üst a645b23f
...@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html. ...@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
.. method:: request_rate(useragent) .. method:: request_rate(useragent)
Returns the contents of the ``Request-rate`` parameter from Returns the contents of the ``Request-rate`` parameter from
``robots.txt`` in the form of a :func:`~collections.namedtuple` ``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``.
``(requests, seconds)``. If there is no such parameter or it doesn't If there is no such parameter or it doesn't apply to the *useragent*
apply to the *useragent* specified or the ``robots.txt`` entry for this specified or the ``robots.txt`` entry for this parameter has invalid
parameter has invalid syntax, return ``None``. syntax, return ``None``.
.. versionadded:: 3.6 .. versionadded:: 3.6
......
...@@ -2,7 +2,6 @@ import io ...@@ -2,7 +2,6 @@ import io
import os import os
import unittest import unittest
import urllib.robotparser import urllib.robotparser
from collections import namedtuple
from test import support from test import support
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
try: try:
...@@ -90,6 +89,10 @@ class BaseRequestRateTest(BaseRobotTest): ...@@ -90,6 +89,10 @@ class BaseRequestRateTest(BaseRobotTest):
self.parser.crawl_delay(agent), self.crawl_delay self.parser.crawl_delay(agent), self.crawl_delay
) )
if self.request_rate: if self.request_rate:
self.assertIsInstance(
self.parser.request_rate(agent),
urllib.robotparser.RequestRate
)
self.assertEqual( self.assertEqual(
self.parser.request_rate(agent).requests, self.parser.request_rate(agent).requests,
self.request_rate.requests self.request_rate.requests
...@@ -111,7 +114,7 @@ Disallow: /a%2fb.html ...@@ -111,7 +114,7 @@ Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html Disallow: /%7ejoe/index.html
""" """
agent = 'figtree' agent = 'figtree'
request_rate = namedtuple('req_rate', 'requests seconds')(9, 30) request_rate = urllib.robotparser.RequestRate(9, 30)
crawl_delay = 3 crawl_delay = 3
good = [('figtree', '/foo.html')] good = [('figtree', '/foo.html')]
bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html', bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
...@@ -240,7 +243,7 @@ Crawl-delay: 1 ...@@ -240,7 +243,7 @@ Crawl-delay: 1
Request-rate: 3/15 Request-rate: 3/15
Disallow: /cyberworld/map/ Disallow: /cyberworld/map/
""" """
request_rate = namedtuple('req_rate', 'requests seconds')(3, 15) request_rate = urllib.robotparser.RequestRate(3, 15)
crawl_delay = 1 crawl_delay = 1
good = ['/', '/test.html'] good = ['/', '/test.html']
bad = ['/cyberworld/map/index.html'] bad = ['/cyberworld/map/index.html']
......
...@@ -16,6 +16,9 @@ import urllib.request ...@@ -16,6 +16,9 @@ import urllib.request
__all__ = ["RobotFileParser"] __all__ = ["RobotFileParser"]
RequestRate = collections.namedtuple("RequestRate", "requests seconds")
class RobotFileParser: class RobotFileParser:
""" This class provides a set of methods to read, parse and answer """ This class provides a set of methods to read, parse and answer
questions about a single robots.txt file. questions about a single robots.txt file.
...@@ -136,11 +139,7 @@ class RobotFileParser: ...@@ -136,11 +139,7 @@ class RobotFileParser:
# check if all values are sane # check if all values are sane
if (len(numbers) == 2 and numbers[0].strip().isdigit() if (len(numbers) == 2 and numbers[0].strip().isdigit()
and numbers[1].strip().isdigit()): and numbers[1].strip().isdigit()):
req_rate = collections.namedtuple('req_rate', entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
'requests seconds')
entry.req_rate = req_rate
entry.req_rate.requests = int(numbers[0])
entry.req_rate.seconds = int(numbers[1])
state = 2 state = 2
if state == 2: if state == 2:
self._add_entry(entry) self._add_entry(entry)
......
Fix wrong usage of :func:`collections.namedtuple` in
the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
method.
Initial patch by Robin Wellner.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment