Kaydet (Commit) 26430419 authored tarafından Senthil Kumaran's avatar Senthil Kumaran

Fix Issue11703 - urllib2.geturl() does not return correct url when the original…

Fix Issue11703 - urllib2.geturl() does not return correct url when the original url contains #fragment. Patch Contribution by Santoso Wijaya.
üst 37805420
...@@ -171,6 +171,16 @@ class urlopen_HttpTests(unittest.TestCase): ...@@ -171,6 +171,16 @@ class urlopen_HttpTests(unittest.TestCase):
finally: finally:
self.unfakehttp() self.unfakehttp()
def test_url_fragment(self):
# Issue #11703: geturl() omits fragments in the original URL.
url = 'http://docs.python.org/library/urllib.html#OK'
self.fakehttp(b'Hello!')
try:
fp = urllib.request.urlopen(url)
self.assertEqual(fp.geturl(), url)
finally:
self.unfakehttp()
def test_read_bogus(self): def test_read_bogus(self):
# urlopen() should raise IOError for many error codes. # urlopen() should raise IOError for many error codes.
self.fakehttp(b'''HTTP/1.1 401 Authentication Required self.fakehttp(b'''HTTP/1.1 401 Authentication Required
......
...@@ -1024,6 +1024,15 @@ class HandlerTests(unittest.TestCase): ...@@ -1024,6 +1024,15 @@ class HandlerTests(unittest.TestCase):
o.open("http://www.example.com/") o.open("http://www.example.com/")
self.assertFalse(hh.req.has_header("Cookie")) self.assertFalse(hh.req.has_header("Cookie"))
def test_redirect_fragment(self):
redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
hdeh = urllib.request.HTTPDefaultErrorHandler()
hrh = urllib.request.HTTPRedirectHandler()
o = build_test_opener(hh, hdeh, hrh)
fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip())
def test_proxy(self): def test_proxy(self):
o = OpenerDirector() o = OpenerDirector()
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
...@@ -1339,12 +1348,16 @@ class RequestTests(unittest.TestCase): ...@@ -1339,12 +1348,16 @@ class RequestTests(unittest.TestCase):
req = Request("<URL:http://www.python.org>") req = Request("<URL:http://www.python.org>")
self.assertEqual("www.python.org", req.get_host()) self.assertEqual("www.python.org", req.get_host())
def test_urlwith_fragment(self): def test_url_fragment(self):
req = Request("http://www.python.org/?qs=query#fragment=true") req = Request("http://www.python.org/?qs=query#fragment=true")
self.assertEqual("/?qs=query", req.get_selector()) self.assertEqual("/?qs=query", req.get_selector())
req = Request("http://www.python.org/#fun=true") req = Request("http://www.python.org/#fun=true")
self.assertEqual("/", req.get_selector()) self.assertEqual("/", req.get_selector())
# Issue 11703: geturl() omits fragment in the original URL.
url = 'http://docs.python.org/library/urllib2.html#OK'
req = Request(url)
self.assertEqual(req.get_full_url(), url)
def test_main(verbose=None): def test_main(verbose=None):
from test import test_urllib2 from test import test_urllib2
......
...@@ -158,7 +158,7 @@ class OtherNetworkTests(unittest.TestCase): ...@@ -158,7 +158,7 @@ class OtherNetworkTests(unittest.TestCase):
req = urllib.request.Request(urlwith_frag) req = urllib.request.Request(urlwith_frag)
res = urllib.request.urlopen(req) res = urllib.request.urlopen(req)
self.assertEqual(res.geturl(), self.assertEqual(res.geturl(),
"http://docs.python.org/glossary.html") "http://docs.python.org/glossary.html#glossary")
def test_custom_headers(self): def test_custom_headers(self):
url = "http://www.example.com" url = "http://www.example.com"
......
...@@ -163,7 +163,7 @@ class Request: ...@@ -163,7 +163,7 @@ class Request:
origin_req_host=None, unverifiable=False): origin_req_host=None, unverifiable=False):
# unwrap('<URL:type://host/path>') --> 'type://host/path' # unwrap('<URL:type://host/path>') --> 'type://host/path'
self.full_url = unwrap(url) self.full_url = unwrap(url)
self.full_url, fragment = splittag(self.full_url) self.full_url, self.fragment = splittag(self.full_url)
self.data = data self.data = data
self.headers = {} self.headers = {}
self._tunnel_host = None self._tunnel_host = None
...@@ -202,7 +202,10 @@ class Request: ...@@ -202,7 +202,10 @@ class Request:
return self.data return self.data
def get_full_url(self): def get_full_url(self):
return self.full_url if self.fragment:
return '%s#%s' % (self.full_url, self.fragment)
else:
return self.full_url
def get_type(self): def get_type(self):
return self.type return self.type
...@@ -1106,7 +1109,7 @@ class AbstractHTTPHandler(BaseHandler): ...@@ -1106,7 +1109,7 @@ class AbstractHTTPHandler(BaseHandler):
except socket.error as err: except socket.error as err:
raise URLError(err) raise URLError(err)
r.url = req.full_url r.url = req.get_full_url()
# This line replaces the .msg attribute of the HTTPResponse # This line replaces the .msg attribute of the HTTPResponse
# with .headers, because urllib clients expect the response to # with .headers, because urllib clients expect the response to
# have the reason in .msg. It would be good to mark this # have the reason in .msg. It would be good to mark this
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment