Kaydet (Commit) 3f8ab965 authored tarafından Senthil Kumaran's avatar Senthil Kumaran

Fix Issue6325 - robotparse to honor urls with query strings.

üst 96a60ae9
......@@ -205,6 +205,17 @@ bad = ['/folder1/anotherfile.html']
RobotTest(13, doc, good, bad, agent="googlebot")
# 14. For issue #6325 (query string support)
doc = """
User-agent: *
Disallow: /some/path?name=value
"""
good = ['/some/path']
bad = ['/some/path?name=value']
RobotTest(14, doc, good, bad)
class NetworkTestCase(unittest.TestCase):
......
......@@ -129,8 +129,10 @@ class RobotFileParser:
return True
# search for given user agent matches
# the first match counts
url = urllib.parse.quote(
urllib.parse.urlparse(urllib.parse.unquote(url))[2])
parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
url = urllib.parse.urlunparse(('','',parsed_url.path,
parsed_url.params,parsed_url.query, parsed_url.fragment))
url = urllib.parse.quote(url)
if not url:
url = "/"
for entry in self.entries:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment