test_urllib.py 62.7 KB
Newer Older
1
"""Regression tests for what was in Python 2's "urllib" module"""
2

3 4
import urllib.parse
import urllib.request
5
import urllib.error
6
import http.client
7
import email.message
8
import io
9
import unittest
10
from unittest.mock import patch
11
from test import support
12
import os
13 14 15 16
try:
    import ssl
except ImportError:
    ssl = None
17
import sys
18
import tempfile
19
from nturl2path import url2pathname, pathname2url
20

21
from base64 import b64encode
22
import collections
23

Senthil Kumaran's avatar
Senthil Kumaran committed
24

25 26 27 28 29 30 31
def hexescape(char):
    """Escape char as RFC 2396 specifies"""
    hex_repr = hex(ord(char))[2:].upper()
    if len(hex_repr) == 1:
        hex_repr = "0%s" % hex_repr
    return "%" + hex_repr

32 33
# Shortcut for testing FancyURLopener
_urlopener = None
34 35


36 37 38 39 40 41
def urlopen(url, data=None, proxies=None):
    """urlopen(url [, data]) -> open file-like object"""
    global _urlopener
    if proxies is not None:
        opener = urllib.request.FancyURLopener(proxies=proxies)
    elif not _urlopener:
42
        opener = FancyURLopener()
43 44 45 46 47 48 49 50
        _urlopener = opener
    else:
        opener = _urlopener
    if data is None:
        return opener.open(url)
    else:
        return opener.open(url, data)

51

52 53 54 55 56 57 58
def FancyURLopener():
    with support.check_warnings(
            ('FancyURLopener style of invoking requests is deprecated.',
            DeprecationWarning)):
        return urllib.request.FancyURLopener()


59 60 61
def fakehttp(fakedata):
    class FakeSocket(io.BytesIO):
        io_refs = 1
62

63 64
        def sendall(self, data):
            FakeHTTPConnection.buf = data
65

66 67 68
        def makefile(self, *args, **kwds):
            self.io_refs += 1
            return self
69

70 71 72 73
        def read(self, amt=None):
            if self.closed:
                return b""
            return io.BytesIO.read(self, amt)
74

75 76 77 78
        def readline(self, length=None):
            if self.closed:
                return b""
            return io.BytesIO.readline(self, length)
79

80 81 82 83 84 85
        def close(self):
            self.io_refs -= 1
            if self.io_refs == 0:
                io.BytesIO.close(self)

    class FakeHTTPConnection(http.client.HTTPConnection):
86

87 88
        # buffer to store data for verification in urlopen tests.
        buf = None
89

90
        def connect(self):
91 92 93
            self.sock = FakeSocket(self.fakedata)
            type(self).fakesock = self.sock
    FakeHTTPConnection.fakedata = fakedata
94

95
    return FakeHTTPConnection
96

97 98 99

class FakeHTTPMixin(object):
    def fakehttp(self, fakedata):
100
        self._connection_class = http.client.HTTPConnection
101
        http.client.HTTPConnection = fakehttp(fakedata)
102 103 104 105 106

    def unfakehttp(self):
        http.client.HTTPConnection = self._connection_class


107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
class FakeFTPMixin(object):
    def fakeftp(self):
        class FakeFtpWrapper(object):
            def __init__(self,  user, passwd, host, port, dirs, timeout=None,
                     persistent=True):
                pass

            def retrfile(self, file, type):
                return io.BytesIO(), 0

            def close(self):
                pass

        self._ftpwrapper_class = urllib.request.ftpwrapper
        urllib.request.ftpwrapper = FakeFtpWrapper

    def unfakeftp(self):
        urllib.request.ftpwrapper = self._ftpwrapper_class


127 128 129 130
class urlopen_FileTests(unittest.TestCase):
    """Test urlopen() opening a temporary file.

    Try to test as much functionality as possible so as to cut down on reliance
131
    on connecting to the Net for testing.
132 133 134 135

    """

    def setUp(self):
136 137 138 139
        # Create a temp file to use for testing
        self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
                          "ascii")
        f = open(support.TESTFN, 'wb')
140
        try:
141
            f.write(self.text)
142
        finally:
143
            f.close()
144
        self.pathname = support.TESTFN
145
        self.returned_obj = urlopen("file:%s" % self.pathname)
146 147 148 149

    def tearDown(self):
        """Shut down the open object"""
        self.returned_obj.close()
150
        os.remove(support.TESTFN)
151 152 153 154

    def test_interface(self):
        # Make sure object returned by urlopen() has the specified methods
        for attr in ("read", "readline", "readlines", "fileno",
155
                     "close", "info", "geturl", "getcode", "__iter__"):
156
            self.assertTrue(hasattr(self.returned_obj, attr),
157 158 159 160 161 162 163 164
                         "object returned by urlopen() lacks %s attribute" %
                         attr)

    def test_read(self):
        self.assertEqual(self.text, self.returned_obj.read())

    def test_readline(self):
        self.assertEqual(self.text, self.returned_obj.readline())
165
        self.assertEqual(b'', self.returned_obj.readline(),
166 167
                         "calling readline() after exhausting the file did not"
                         " return an empty string")
Jeremy Hylton's avatar
Jeremy Hylton committed
168

169 170 171 172 173 174 175 176 177
    def test_readlines(self):
        lines_list = self.returned_obj.readlines()
        self.assertEqual(len(lines_list), 1,
                         "readlines() returned the wrong number of lines")
        self.assertEqual(lines_list[0], self.text,
                         "readlines() returned improper text")

    def test_fileno(self):
        file_num = self.returned_obj.fileno()
178
        self.assertIsInstance(file_num, int, "fileno() did not return an int")
179 180 181 182 183
        self.assertEqual(os.read(file_num, len(self.text)), self.text,
                         "Reading on the file descriptor returned by fileno() "
                         "did not return the expected text")

    def test_close(self):
184
        # Test close() by calling it here and then having it be called again
185 186 187 188
        # by the tearDown() method for the test
        self.returned_obj.close()

    def test_info(self):
189
        self.assertIsInstance(self.returned_obj.info(), email.message.Message)
190 191 192 193

    def test_geturl(self):
        self.assertEqual(self.returned_obj.geturl(), self.pathname)

194
    def test_getcode(self):
195
        self.assertIsNone(self.returned_obj.getcode())
196

197 198 199 200
    def test_iter(self):
        # Test iterator
        # Don't need to count number of iterations since test would fail the
        # instant it returned anything beyond the first line from the
201 202 203
        # comparison.
        # Use the iterator in the usual implicit way to test for ticket #4608.
        for line in self.returned_obj:
204 205
            self.assertEqual(line, self.text)

206 207 208
    def test_relativelocalfile(self):
        self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)

209

Benjamin Peterson's avatar
Benjamin Peterson committed
210 211 212
class ProxyTests(unittest.TestCase):

    def setUp(self):
213 214
        # Records changes to env vars
        self.env = support.EnvironmentVarGuard()
215
        # Delete all proxy related env vars
Antoine Pitrou's avatar
Antoine Pitrou committed
216
        for k in list(os.environ):
Antoine Pitrou's avatar
Antoine Pitrou committed
217
            if 'proxy' in k.lower():
218
                self.env.unset(k)
Benjamin Peterson's avatar
Benjamin Peterson committed
219 220 221

    def tearDown(self):
        # Restore all proxy related env vars
222 223
        self.env.__exit__()
        del self.env
Benjamin Peterson's avatar
Benjamin Peterson committed
224 225

    def test_getproxies_environment_keep_no_proxies(self):
226 227 228
        self.env.set('NO_PROXY', 'localhost')
        proxies = urllib.request.getproxies_environment()
        # getproxies_environment use lowered case truncated (no '_proxy') keys
229
        self.assertEqual('localhost', proxies['no'])
230
        # List of no_proxies with space.
231
        self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
232
        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
233 234 235
        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
        self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))

236 237 238 239 240 241 242 243 244 245 246 247
    def test_proxy_cgi_ignore(self):
        try:
            self.env.set('HTTP_PROXY', 'http://somewhere:3128')
            proxies = urllib.request.getproxies_environment()
            self.assertEqual('http://somewhere:3128', proxies['http'])
            self.env.set('REQUEST_METHOD', 'GET')
            proxies = urllib.request.getproxies_environment()
            self.assertNotIn('http', proxies)
        finally:
            self.env.unset('REQUEST_METHOD')
            self.env.unset('HTTP_PROXY')

248 249 250
    def test_proxy_bypass_environment_host_match(self):
        bypass = urllib.request.proxy_bypass_environment
        self.env.set('NO_PROXY',
251
                     'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
252 253 254 255
        self.assertTrue(bypass('localhost'))
        self.assertTrue(bypass('LocalHost'))                 # MixedCase
        self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
        self.assertTrue(bypass('newdomain.com:1234'))
256
        self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
257 258 259 260 261
        self.assertTrue(bypass('anotherdomain.com:8888'))
        self.assertTrue(bypass('www.newdomain.com:1234'))
        self.assertFalse(bypass('prelocalhost'))
        self.assertFalse(bypass('newdomain.com'))            # no port
        self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
262

263

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
class ProxyTests_withOrderedEnv(unittest.TestCase):

    def setUp(self):
        # We need to test conditions, where variable order _is_ significant
        self._saved_env = os.environ
        # Monkey patch os.environ, start with empty fake environment
        os.environ = collections.OrderedDict()

    def tearDown(self):
        os.environ = self._saved_env

    def test_getproxies_environment_prefer_lowercase(self):
        # Test lowercase preference with removal
        os.environ['no_proxy'] = ''
        os.environ['No_Proxy'] = 'localhost'
        self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
        os.environ['http_proxy'] = ''
        os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
        proxies = urllib.request.getproxies_environment()
        self.assertEqual({}, proxies)
        # Test lowercase preference of proxy bypass and correct matching including ports
        os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
        os.environ['No_Proxy'] = 'xyz.com'
        self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
        self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
        self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
        self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
        # Test lowercase preference with replacement
        os.environ['http_proxy'] = 'http://somewhere:3128'
        os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
        proxies = urllib.request.getproxies_environment()
        self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson's avatar
Benjamin Peterson committed
298

299

300
class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
301 302
    """Test urlopen() opening a fake http connection."""

303 304
    def check_read(self, ver):
        self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
305
        try:
306
            fp = urlopen("http://python.org/")
307 308
            self.assertEqual(fp.readline(), b"Hello!")
            self.assertEqual(fp.readline(), b"")
309 310
            self.assertEqual(fp.geturl(), 'http://python.org/')
            self.assertEqual(fp.getcode(), 200)
311 312 313
        finally:
            self.unfakehttp()

314 315 316
    def test_url_fragment(self):
        # Issue #11703: geturl() omits fragments in the original URL.
        url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaran's avatar
Senthil Kumaran committed
317
        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
318 319 320
        try:
            fp = urllib.request.urlopen(url)
            self.assertEqual(fp.geturl(), url)
321 322 323
        finally:
            self.unfakehttp()

324 325
    def test_willclose(self):
        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
326 327 328 329 330
        try:
            resp = urlopen("http://www.python.org")
            self.assertTrue(resp.fp.will_close)
        finally:
            self.unfakehttp()
331

332 333 334 335 336 337 338 339 340 341 342
    def test_read_0_9(self):
        # "0.9" response accepted (but not "simple responses" without
        # a status line)
        self.check_read(b"0.9")

    def test_read_1_0(self):
        self.check_read(b"1.0")

    def test_read_1_1(self):
        self.check_read(b"1.1")

343
    def test_read_bogus(self):
344
        # urlopen() should raise OSError for many error codes.
345 346 347 348 349 350 351
        self.fakehttp(b'''HTTP/1.1 401 Authentication Required
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Type: text/html; charset=iso-8859-1
''')
        try:
352
            self.assertRaises(OSError, urlopen, "http://python.org/")
353 354 355
        finally:
            self.unfakehttp()

356
    def test_invalid_redirect(self):
357
        # urlopen() should raise OSError for many error codes.
358 359 360 361 362 363 364 365
        self.fakehttp(b'''HTTP/1.1 302 Found
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Location: file://guidocomputer.athome.com:/python/license
Connection: close
Content-Type: text/html; charset=iso-8859-1
''')
        try:
366 367 368
            msg = "Redirection to url 'file:"
            with self.assertRaisesRegex(urllib.error.HTTPError, msg):
                urlopen("http://python.org/")
369 370 371
        finally:
            self.unfakehttp()

372 373 374 375 376 377 378 379 380 381 382 383 384 385
    def test_redirect_limit_independent(self):
        # Ticket #12923: make sure independent requests each use their
        # own retry limit.
        for i in range(FancyURLopener().maxtries):
            self.fakehttp(b'''HTTP/1.1 302 Found
Location: file://guidocomputer.athome.com:/python/license
Connection: close
''')
            try:
                self.assertRaises(urllib.error.HTTPError, urlopen,
                    "http://something")
            finally:
                self.unfakehttp()

386
    def test_empty_socket(self):
387
        # urlopen() raises OSError if the underlying socket does not send any
388
        # data. (#1680230)
389
        self.fakehttp(b'')
390
        try:
391
            self.assertRaises(OSError, urlopen, "http://something")
392 393 394
        finally:
            self.unfakehttp()

395 396
    def test_missing_localfile(self):
        # Test for #10836
397
        with self.assertRaises(urllib.error.URLError) as e:
398
            urlopen('file://localhost/a/file/which/doesnot/exists.py')
399 400 401 402 403
        self.assertTrue(e.exception.filename)
        self.assertTrue(e.exception.reason)

    def test_file_notexists(self):
        fd, tmp_file = tempfile.mkstemp()
404
        tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
405 406 407 408 409 410 411
        try:
            self.assertTrue(os.path.exists(tmp_file))
            with urlopen(tmp_fileurl) as fobj:
                self.assertTrue(fobj)
        finally:
            os.close(fd)
            os.unlink(tmp_file)
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
        self.assertFalse(os.path.exists(tmp_file))
        with self.assertRaises(urllib.error.URLError):
            urlopen(tmp_fileurl)

    def test_ftp_nohost(self):
        test_ftp_url = 'ftp:///path'
        with self.assertRaises(urllib.error.URLError) as e:
            urlopen(test_ftp_url)
        self.assertFalse(e.exception.filename)
        self.assertTrue(e.exception.reason)

    def test_ftp_nonexisting(self):
        with self.assertRaises(urllib.error.URLError) as e:
            urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
        self.assertFalse(e.exception.filename)
        self.assertTrue(e.exception.reason)

429 430 431 432 433 434 435 436 437
    @patch.object(urllib.request, 'MAXFTPCACHE', 0)
    def test_ftp_cache_pruning(self):
        self.fakeftp()
        try:
            urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
            urlopen('ftp://localhost')
        finally:
            self.unfakeftp()

438
    def test_userpass_inurl(self):
439
        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
440 441 442 443 444 445 446 447 448
        try:
            fp = urlopen("http://user:pass@python.org/")
            self.assertEqual(fp.readline(), b"Hello!")
            self.assertEqual(fp.readline(), b"")
            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
            self.assertEqual(fp.getcode(), 200)
        finally:
            self.unfakehttp()

449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
    def test_userpass_inurl_w_spaces(self):
        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
        try:
            userpass = "a b:c d"
            url = "http://{}@python.org/".format(userpass)
            fakehttp_wrapper = http.client.HTTPConnection
            authorization = ("Authorization: Basic %s\r\n" %
                             b64encode(userpass.encode("ASCII")).decode("ASCII"))
            fp = urlopen(url)
            # The authorization header must be in place
            self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
            self.assertEqual(fp.readline(), b"Hello!")
            self.assertEqual(fp.readline(), b"")
            # the spaces are quoted in URL so no match
            self.assertNotEqual(fp.geturl(), url)
            self.assertEqual(fp.getcode(), 200)
        finally:
            self.unfakehttp()

468 469
    def test_URLopener_deprecation(self):
        with support.check_warnings(('',DeprecationWarning)):
470
            urllib.request.URLopener()
471

472
    @unittest.skipUnless(ssl, "ssl module required")
473 474
    def test_cafile_and_context(self):
        context = ssl.create_default_context()
475 476 477 478 479
        with support.check_warnings(('', DeprecationWarning)):
            with self.assertRaises(ValueError):
                urllib.request.urlopen(
                    "https://localhost", cafile="/nonexistent/path", context=context
                )
480

481

482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
class urlopen_DataTests(unittest.TestCase):
    """Test urlopen() opening a data URL."""

    def setUp(self):
        # text containing URL special- and unicode-characters
        self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
        # 2x1 pixel RGB PNG image with one black and one white pixel
        self.image = (
            b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
            b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
            b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
            b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')

        self.text_url = (
            "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
            "D%26%20%C3%B6%20%C3%84%20")
        self.text_url_base64 = (
            "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
            "sJT0mIPYgxCA%3D")
        # base64 encoded data URL that contains ignorable spaces,
        # such as "\n", " ", "%0A", and "%20".
        self.image_url = (
            "\n"
            "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
            "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")

        self.text_url_resp = urllib.request.urlopen(self.text_url)
        self.text_url_base64_resp = urllib.request.urlopen(
            self.text_url_base64)
        self.image_url_resp = urllib.request.urlopen(self.image_url)

    def test_interface(self):
        # Make sure object returned by urlopen() has the specified methods
        for attr in ("read", "readline", "readlines",
                     "close", "info", "geturl", "getcode", "__iter__"):
            self.assertTrue(hasattr(self.text_url_resp, attr),
                         "object returned by urlopen() lacks %s attribute" %
                         attr)

    def test_info(self):
        self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
        self.assertEqual(self.text_url_base64_resp.info().get_params(),
            [('text/plain', ''), ('charset', 'ISO-8859-1')])
        self.assertEqual(self.image_url_resp.info()['content-length'],
            str(len(self.image)))
        self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
            [('text/plain', ''), ('charset', 'US-ASCII')])

    def test_geturl(self):
        self.assertEqual(self.text_url_resp.geturl(), self.text_url)
        self.assertEqual(self.text_url_base64_resp.geturl(),
            self.text_url_base64)
        self.assertEqual(self.image_url_resp.geturl(), self.image_url)

    def test_read_text(self):
        self.assertEqual(self.text_url_resp.read().decode(
            dict(self.text_url_resp.info().get_params())['charset']), self.text)

    def test_read_text_base64(self):
        self.assertEqual(self.text_url_base64_resp.read().decode(
            dict(self.text_url_base64_resp.info().get_params())['charset']),
            self.text)

    def test_read_image(self):
        self.assertEqual(self.image_url_resp.read(), self.image)

    def test_missing_comma(self):
        self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')

    def test_invalid_base64_data(self):
        # missing padding character
        self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')

555

556
class urlretrieve_FileTests(unittest.TestCase):
557 558
    """Test urllib.urlretrieve() on local files"""

559
    def setUp(self):
560 561 562 563 564 565 566 567 568
        # Create a list of temporary files. Each item in the list is a file
        # name (absolute path or relative to the current working directory).
        # All files in this list will be deleted in the tearDown method. Note,
        # this only helps to makes sure temporary files get deleted, but it
        # does nothing about trying to close files that may still be open. It
        # is the responsibility of the developer to properly close files even
        # when exceptional conditions occur.
        self.tempFiles = []

569
        # Create a temporary file.
570
        self.registerFileForCleanUp(support.TESTFN)
571
        self.text = b'testing urllib.urlretrieve'
572
        try:
573
            FILE = open(support.TESTFN, 'wb')
574 575 576 577 578
            FILE.write(self.text)
            FILE.close()
        finally:
            try: FILE.close()
            except: pass
579 580

    def tearDown(self):
581 582 583 584 585 586
        # Delete the temporary files.
        for each in self.tempFiles:
            try: os.remove(each)
            except: pass

    def constructLocalFileUrl(self, filePath):
587 588
        filePath = os.path.abspath(filePath)
        try:
589
            filePath.encode("utf-8")
590 591 592
        except UnicodeEncodeError:
            raise unittest.SkipTest("filePath is not encodable to utf8")
        return "file://%s" % urllib.request.pathname2url(filePath)
593

594
    def createNewTempFile(self, data=b""):
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
        """Creates a new temporary file containing the specified data,
        registers the file for deletion during the test fixture tear down, and
        returns the absolute path of the file."""

        newFd, newFilePath = tempfile.mkstemp()
        try:
            self.registerFileForCleanUp(newFilePath)
            newFile = os.fdopen(newFd, "wb")
            newFile.write(data)
            newFile.close()
        finally:
            try: newFile.close()
            except: pass
        return newFilePath

    def registerFileForCleanUp(self, fileName):
        self.tempFiles.append(fileName)
612 613 614 615

    def test_basic(self):
        # Make sure that a local file just gets its own location returned and
        # a headers value is returned.
616
        result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
617
        self.assertEqual(result[0], support.TESTFN)
618
        self.assertIsInstance(result[1], email.message.Message,
619
                              "did not get an email.message.Message instance "
620
                              "as second returned value")
621 622 623

    def test_copy(self):
        # Test that setting the filename argument works.
624
        second_temp = "%s.2" % support.TESTFN
625
        self.registerFileForCleanUp(second_temp)
626
        result = urllib.request.urlretrieve(self.constructLocalFileUrl(
627
            support.TESTFN), second_temp)
628
        self.assertEqual(second_temp, result[0])
629
        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
630
                                                  "made")
631
        FILE = open(second_temp, 'rb')
632 633 634
        try:
            text = FILE.read()
            FILE.close()
635 636 637
        finally:
            try: FILE.close()
            except: pass
638 639 640 641
        self.assertEqual(self.text, text)

    def test_reporthook(self):
        # Make sure that the reporthook works.
642 643 644 645 646
        def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
            self.assertIsInstance(block_count, int)
            self.assertIsInstance(block_read_size, int)
            self.assertIsInstance(file_size, int)
            self.assertEqual(block_count, count_holder[0])
647
            count_holder[0] = count_holder[0] + 1
648
        second_temp = "%s.2" % support.TESTFN
649
        self.registerFileForCleanUp(second_temp)
650 651
        urllib.request.urlretrieve(
            self.constructLocalFileUrl(support.TESTFN),
652 653 654 655 656
            second_temp, hooktester)

    def test_reporthook_0_bytes(self):
        # Test on zero length file. Should call reporthook only 1 time.
        report = []
657 658
        def hooktester(block_count, block_read_size, file_size, _report=report):
            _report.append((block_count, block_read_size, file_size))
659
        srcFileName = self.createNewTempFile()
660
        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
661
            support.TESTFN, hooktester)
662 663 664 665 666 667
        self.assertEqual(len(report), 1)
        self.assertEqual(report[0][2], 0)

    def test_reporthook_5_bytes(self):
        # Test on 5 byte file. Should call reporthook only 2 times (once when
        # the "network connection" is established and once when the block is
668
        # read).
669
        report = []
670 671
        def hooktester(block_count, block_read_size, file_size, _report=report):
            _report.append((block_count, block_read_size, file_size))
672
        srcFileName = self.createNewTempFile(b"x" * 5)
673
        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
674
            support.TESTFN, hooktester)
675
        self.assertEqual(len(report), 2)
676 677
        self.assertEqual(report[0][2], 5)
        self.assertEqual(report[1][2], 5)
678 679 680 681 682 683

    def test_reporthook_8193_bytes(self):
        # Test on 8193 byte file. Should call reporthook only 3 times (once
        # when the "network connection" is established, once for the next 8192
        # bytes, and once for the last byte).
        report = []
684 685
        def hooktester(block_count, block_read_size, file_size, _report=report):
            _report.append((block_count, block_read_size, file_size))
686
        srcFileName = self.createNewTempFile(b"x" * 8193)
687
        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
688
            support.TESTFN, hooktester)
689
        self.assertEqual(len(report), 3)
690 691
        self.assertEqual(report[0][2], 8193)
        self.assertEqual(report[0][1], 8192)
692
        self.assertEqual(report[1][1], 8192)
693
        self.assertEqual(report[2][1], 8192)
694

695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736

class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
    """Test urllib.urlretrieve() using fake http connections"""

    def test_short_content_raises_ContentTooShortError(self):
        self.fakehttp(b'''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1

FF
''')

        def _reporthook(par1, par2, par3):
            pass

        with self.assertRaises(urllib.error.ContentTooShortError):
            try:
                urllib.request.urlretrieve('http://example.com/',
                                           reporthook=_reporthook)
            finally:
                self.unfakehttp()

    def test_short_content_raises_ContentTooShortError_without_reporthook(self):
        self.fakehttp(b'''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1

FF
''')
        with self.assertRaises(urllib.error.ContentTooShortError):
            try:
                urllib.request.urlretrieve('http://example.com/')
            finally:
                self.unfakehttp()


737
class QuotingTests(unittest.TestCase):
738
    r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Peters's avatar
Tim Peters committed
739

740
    According to RFC 3986 (Uniform Resource Identifiers), to escape a
741 742 743
    character you write it as '%' + <2 character US-ASCII hex value>.
    The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
    character properly. Case does not matter on the hex letters.
744 745

    The various character sets specified are:
Tim Peters's avatar
Tim Peters committed
746

747 748 749 750 751 752 753 754 755 756 757 758 759
    Reserved characters : ";/?:@&=+$,"
        Have special meaning in URIs and must be escaped if not being used for
        their special meaning
    Data characters : letters, digits, and "-_.!~*'()"
        Unreserved and do not need to be escaped; can be, though, if desired
    Control characters : 0x00 - 0x1F, 0x7F
        Have no use in URIs so must be escaped
    space : 0x20
        Must be escaped
    Delimiters : '<>#%"'
        Must be escaped
    Unwise : "{}|\^[]`"
        Must be escaped
Tim Peters's avatar
Tim Peters committed
760

761 762 763 764 765 766 767
    """

    def test_never_quote(self):
        # Make sure quote() does not quote letters, digits, and "_,.-"
        do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
                                 "abcdefghijklmnopqrstuvwxyz",
                                 "0123456789",
768
                                 "_.-~"])
769
        result = urllib.parse.quote(do_not_quote)
770
        self.assertEqual(do_not_quote, result,
771
                         "using quote(): %r != %r" % (do_not_quote, result))
772
        result = urllib.parse.quote_plus(do_not_quote)
773
        self.assertEqual(do_not_quote, result,
774
                        "using quote_plus(): %r != %r" % (do_not_quote, result))
775 776 777

    def test_default_safe(self):
        # Test '/' is default value for 'safe' parameter
778
        self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
779 780 781 782

    def test_safe(self):
        # Test setting 'safe' parameter does what it should do
        quote_by_default = "<>"
783
        result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
784
        self.assertEqual(quote_by_default, result,
785
                         "using quote(): %r != %r" % (quote_by_default, result))
786 787
        result = urllib.parse.quote_plus(quote_by_default,
                                         safe=quote_by_default)
788
        self.assertEqual(quote_by_default, result,
789
                         "using quote_plus(): %r != %r" %
790
                         (quote_by_default, result))
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807
        # Safe expressed as bytes rather than str
        result = urllib.parse.quote(quote_by_default, safe=b"<>")
        self.assertEqual(quote_by_default, result,
                         "using quote(): %r != %r" % (quote_by_default, result))
        # "Safe" non-ASCII characters should have no effect
        # (Since URIs are not allowed to have non-ASCII characters)
        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" %
                         (expect, result))
        # Same as above, but using a bytes rather than str
        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" %
                         (expect, result))
808 809 810 811 812

    def test_default_quoting(self):
        # Make sure all characters that should be quoted are by default sans
        # space (separate test for that).
        should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
813
        should_quote.append(r'<>#%"{}|\^[]`')
814 815 816
        should_quote.append(chr(127)) # For 0x7F
        should_quote = ''.join(should_quote)
        for char in should_quote:
817
            result = urllib.parse.quote(char)
818
            self.assertEqual(hexescape(char), result,
819 820
                             "using quote(): "
                             "%s should be escaped to %s, not %s" %
821
                             (char, hexescape(char), result))
822
            result = urllib.parse.quote_plus(char)
823 824
            self.assertEqual(hexescape(char), result,
                             "using quote_plus(): "
Tim Peters's avatar
Tim Peters committed
825
                             "%s should be escapes to %s, not %s" %
826 827 828 829
                             (char, hexescape(char), result))
        del should_quote
        partial_quote = "ab[]cd"
        expected = "ab%5B%5Dcd"
830
        result = urllib.parse.quote(partial_quote)
831
        self.assertEqual(expected, result,
832
                         "using quote(): %r != %r" % (expected, result))
833
        result = urllib.parse.quote_plus(partial_quote)
834
        self.assertEqual(expected, result,
835
                         "using quote_plus(): %r != %r" % (expected, result))
836 837 838 839

    def test_quoting_space(self):
        # Make sure quote() and quote_plus() handle spaces as specified in
        # their unique way
840
        result = urllib.parse.quote(' ')
841
        self.assertEqual(result, hexescape(' '),
842
                         "using quote(): %r != %r" % (result, hexescape(' ')))
843
        result = urllib.parse.quote_plus(' ')
844
        self.assertEqual(result, '+',
845
                         "using quote_plus(): %r != +" % result)
846 847
        given = "a b cd e f"
        expect = given.replace(' ', hexescape(' '))
848
        result = urllib.parse.quote(given)
849
        self.assertEqual(expect, result,
850
                         "using quote(): %r != %r" % (expect, result))
851
        expect = given.replace(' ', '+')
852
        result = urllib.parse.quote_plus(given)
853
        self.assertEqual(expect, result,
854
                         "using quote_plus(): %r != %r" % (expect, result))
855

856
    def test_quoting_plus(self):
857
        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
858
                         'alpha%2Bbeta+gamma')
859
        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
860
                         'alpha+beta+gamma')
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
        # Test with bytes
        self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
                         'alpha%2Bbeta+gamma')
        # Test with safe bytes
        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
                         'alpha+beta+gamma')

    def test_quote_bytes(self):
        # Bytes should quote directly to percent-encoded values
        given = b"\xa2\xd8ab\xff"
        expect = "%A2%D8ab%FF"
        result = urllib.parse.quote(given)
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
        # Encoding argument should raise type error on bytes input
        self.assertRaises(TypeError, urllib.parse.quote, given,
                            encoding="latin-1")
        # quote_from_bytes should work the same
        result = urllib.parse.quote_from_bytes(given)
        self.assertEqual(expect, result,
                         "using quote_from_bytes(): %r != %r"
                         % (expect, result))

    def test_quote_with_unicode(self):
        # Characters in Latin-1 range, encoded by default in UTF-8
        given = "\xa2\xd8ab\xff"
        expect = "%C2%A2%C3%98ab%C3%BF"
        result = urllib.parse.quote(given)
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
        # Characters in Latin-1 range, encoded by with None (default)
        result = urllib.parse.quote(given, encoding=None, errors=None)
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
        # Characters in Latin-1 range, encoded with Latin-1
        given = "\xa2\xd8ab\xff"
        expect = "%A2%D8ab%FF"
        result = urllib.parse.quote(given, encoding="latin-1")
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
        # Characters in BMP, encoded by default in UTF-8
        given = "\u6f22\u5b57"              # "Kanji"
        expect = "%E6%BC%A2%E5%AD%97"
        result = urllib.parse.quote(given)
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
        # Characters in BMP, encoded with Latin-1
        given = "\u6f22\u5b57"
        self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
                                    encoding="latin-1")
        # Characters in BMP, encoded with Latin-1, with replace error handling
        given = "\u6f22\u5b57"
        expect = "%3F%3F"                   # "??"
        result = urllib.parse.quote(given, encoding="latin-1",
                                    errors="replace")
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
        # Characters in BMP, Latin-1, with xmlcharref error handling
        given = "\u6f22\u5b57"
        expect = "%26%2328450%3B%26%2323383%3B"     # "&#28450;&#23383;"
        result = urllib.parse.quote(given, encoding="latin-1",
                                    errors="xmlcharrefreplace")
        self.assertEqual(expect, result,
                         "using quote(): %r != %r" % (expect, result))
925

926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
    def test_quote_plus_with_unicode(self):
        # Encoding (latin-1) test for quote_plus
        given = "\xa2\xd8 \xff"
        expect = "%A2%D8+%FF"
        result = urllib.parse.quote_plus(given, encoding="latin-1")
        self.assertEqual(expect, result,
                         "using quote_plus(): %r != %r" % (expect, result))
        # Errors test for quote_plus
        given = "ab\u6f22\u5b57 cd"
        expect = "ab%3F%3F+cd"
        result = urllib.parse.quote_plus(given, encoding="latin-1",
                                         errors="replace")
        self.assertEqual(expect, result,
                         "using quote_plus(): %r != %r" % (expect, result))

941

942 943
class UnquotingTests(unittest.TestCase):
    """Tests for unquote() and unquote_plus()
Tim Peters's avatar
Tim Peters committed
944

945 946 947 948 949 950 951 952 953 954
    See the doc string for quoting_Tests for details on quoting and such.

    """

    def test_unquoting(self):
        # Make sure unquoting of all ASCII values works
        escape_list = []
        for num in range(128):
            given = hexescape(chr(num))
            expect = chr(num)
955
            result = urllib.parse.unquote(given)
956
            self.assertEqual(expect, result,
957
                             "using unquote(): %r != %r" % (expect, result))
958
            result = urllib.parse.unquote_plus(given)
959
            self.assertEqual(expect, result,
960
                             "using unquote_plus(): %r != %r" %
961 962 963 964
                             (expect, result))
            escape_list.append(given)
        escape_string = ''.join(escape_list)
        del escape_list
965
        result = urllib.parse.unquote(escape_string)
966 967 968
        self.assertEqual(result.count('%'), 1,
                         "using unquote(): not all characters escaped: "
                         "%s" % result)
969 970
        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
971 972
        with support.check_warnings(('', BytesWarning), quiet=True):
            self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
973

974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
    def test_unquoting_badpercent(self):
        # Test unquoting on bad percent-escapes
        given = '%xab'
        expect = given
        result = urllib.parse.unquote(given)
        self.assertEqual(expect, result, "using unquote(): %r != %r"
                         % (expect, result))
        given = '%x'
        expect = given
        result = urllib.parse.unquote(given)
        self.assertEqual(expect, result, "using unquote(): %r != %r"
                         % (expect, result))
        given = '%'
        expect = given
        result = urllib.parse.unquote(given)
        self.assertEqual(expect, result, "using unquote(): %r != %r"
                         % (expect, result))
        # unquote_to_bytes
        given = '%xab'
        expect = bytes(given, 'ascii')
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        given = '%x'
        expect = bytes(given, 'ascii')
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        given = '%'
        expect = bytes(given, 'ascii')
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
1007 1008
        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
1009

1010 1011 1012 1013 1014 1015 1016 1017 1018
    def test_unquoting_mixed_case(self):
        # Test unquoting on mixed-case hex digits in the percent-escapes
        given = '%Ab%eA'
        expect = b'\xab\xea'
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))

1019 1020 1021 1022 1023
    def test_unquoting_parts(self):
        # Make sure unquoting works when have non-quoted characters
        # interspersed
        given = 'ab%sd' % hexescape('c')
        expect = "abcd"
1024
        result = urllib.parse.unquote(given)
1025
        self.assertEqual(expect, result,
1026
                         "using quote(): %r != %r" % (expect, result))
1027
        result = urllib.parse.unquote_plus(given)
1028
        self.assertEqual(expect, result,
1029
                         "using unquote_plus(): %r != %r" % (expect, result))
Tim Peters's avatar
Tim Peters committed
1030

1031 1032 1033 1034
    def test_unquoting_plus(self):
        # Test difference between unquote() and unquote_plus()
        given = "are+there+spaces..."
        expect = given
1035
        result = urllib.parse.unquote(given)
1036
        self.assertEqual(expect, result,
1037
                         "using unquote(): %r != %r" % (expect, result))
1038
        expect = given.replace('+', ' ')
1039
        result = urllib.parse.unquote_plus(given)
1040
        self.assertEqual(expect, result,
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
                         "using unquote_plus(): %r != %r" % (expect, result))

    def test_unquote_to_bytes(self):
        given = 'br%C3%BCckner_sapporo_20050930.doc'
        expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        # Test on a string with unescaped non-ASCII characters
        # (Technically an invalid URI; expect those characters to be UTF-8
        # encoded).
        result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
        expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        # Test with a bytes as input
        given = b'%A2%D8ab%FF'
        expect = b'\xa2\xd8ab\xff'
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        # Test with a bytes as input, with unescaped non-ASCII bytes
        # (Technically an invalid URI; expect those bytes to be preserved)
        given = b'%A2\xd8ab%FF'
        expect = b'\xa2\xd8ab\xff'
        result = urllib.parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
1073

1074
    def test_unquote_with_unicode(self):
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
        # Characters in the Latin-1 range, encoded with UTF-8
        given = 'br%C3%BCckner_sapporo_20050930.doc'
        expect = 'br\u00fcckner_sapporo_20050930.doc'
        result = urllib.parse.unquote(given)
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))
        # Characters in the Latin-1 range, encoded with None (default)
        result = urllib.parse.unquote(given, encoding=None, errors=None)
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # Characters in the Latin-1 range, encoded with Latin-1
        result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
                                      encoding="latin-1")
        expect = 'br\u00fcckner_sapporo_20050930.doc'
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # Characters in BMP, encoded with UTF-8
        given = "%E6%BC%A2%E5%AD%97"
        expect = "\u6f22\u5b57"             # "Kanji"
        result = urllib.parse.unquote(given)
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # Decode with UTF-8, invalid sequence
        given = "%F3%B1"
        expect = "\ufffd"                   # Replacement character
        result = urllib.parse.unquote(given)
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # Decode with UTF-8, invalid sequence, replace errors
        result = urllib.parse.unquote(given, errors="replace")
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # Decode with UTF-8, invalid sequence, ignoring errors
        given = "%F3%B1"
        expect = ""
        result = urllib.parse.unquote(given, errors="ignore")
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # A mix of non-ASCII and percent-encoded characters, UTF-8
        result = urllib.parse.unquote("\u6f22%C3%BC")
        expect = '\u6f22\u00fc'
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))

        # A mix of non-ASCII and percent-encoded characters, Latin-1
        # (Note, the string contains non-Latin-1-representable characters)
        result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
        expect = '\u6f22\u00fc'
        self.assertEqual(expect, result,
                         "using unquote(): %r != %r" % (expect, result))
1131

1132 1133 1134 1135 1136
class urlencode_Tests(unittest.TestCase):
    """Tests for urlencode()"""

    def help_inputtype(self, given, test_type):
        """Helper method for testing different input types.
Tim Peters's avatar
Tim Peters committed
1137

1138 1139 1140 1141
        'given' must lead to only the pairs:
            * 1st, 1
            * 2nd, 2
            * 3rd, 3
Tim Peters's avatar
Tim Peters committed
1142

1143 1144
        Test cannot assume anything about order.  Docs make no guarantee and
        have possible dictionary input.
Tim Peters's avatar
Tim Peters committed
1145

1146 1147
        """
        expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
1148
        result = urllib.parse.urlencode(given)
1149
        for expected in expect_somewhere:
1150
            self.assertIn(expected, result,
1151 1152 1153 1154 1155 1156 1157 1158
                         "testing %s: %s not found in %s" %
                         (test_type, expected, result))
        self.assertEqual(result.count('&'), 2,
                         "testing %s: expected 2 '&'s; got %s" %
                         (test_type, result.count('&')))
        amp_location = result.index('&')
        on_amp_left = result[amp_location - 1]
        on_amp_right = result[amp_location + 1]
1159
        self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180
                     "testing %s: '&' not located in proper place in %s" %
                     (test_type, result))
        self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
                         "testing %s: "
                         "unexpected number of characters: %s != %s" %
                         (test_type, len(result), (5 * 3) + 2))

    def test_using_mapping(self):
        # Test passing in a mapping object as an argument.
        self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
                            "using dict as input type")

    def test_using_sequence(self):
        # Test passing in a sequence of two-item sequences as an argument.
        self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
                            "using sequence of two-item tuples as input")

    def test_quoting(self):
        # Make sure keys and values are quoted using quote_plus()
        given = {"&":"="}
        expect = "%s=%s" % (hexescape('&'), hexescape('='))
1181
        result = urllib.parse.urlencode(given)
1182 1183 1184
        self.assertEqual(expect, result)
        given = {"key name":"A bunch of pluses"}
        expect = "key+name=A+bunch+of+pluses"
1185
        result = urllib.parse.urlencode(given)
1186 1187 1188 1189 1190
        self.assertEqual(expect, result)

    def test_doseq(self):
        # Test that passing True for 'doseq' parameter works correctly
        given = {'sequence':['1', '2', '3']}
1191 1192
        expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
        result = urllib.parse.urlencode(given)
1193
        self.assertEqual(expect, result)
1194
        result = urllib.parse.urlencode(given, True)
1195 1196
        for value in given["sequence"]:
            expect = "sequence=%s" % value
1197
            self.assertIn(expect, result)
1198 1199 1200
        self.assertEqual(result.count('&'), 2,
                         "Expected 2 '&'s, got %s" % result.count('&'))

1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
    def test_empty_sequence(self):
        self.assertEqual("", urllib.parse.urlencode({}))
        self.assertEqual("", urllib.parse.urlencode([]))

    def test_nonstring_values(self):
        self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
        self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))

    def test_nonstring_seq_values(self):
        self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
        self.assertEqual("a=None&a=a",
                         urllib.parse.urlencode({"a": [None, "a"]}, True))
1213
        data = collections.OrderedDict([("a", 1), ("b", 1)])
1214
        self.assertEqual("a=a&a=b",
1215
                         urllib.parse.urlencode({"a": data}, True))
1216

1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    def test_urlencode_encoding(self):
        # ASCII encoding. Expect %3F with errors="replace'
        given = (('\u00a0', '\u00c1'),)
        expect = '%3F=%3F'
        result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
        self.assertEqual(expect, result)

        # Default is UTF-8 encoding.
        given = (('\u00a0', '\u00c1'),)
        expect = '%C2%A0=%C3%81'
        result = urllib.parse.urlencode(given)
        self.assertEqual(expect, result)

        # Latin-1 encoding.
        given = (('\u00a0', '\u00c1'),)
        expect = '%A0=%C1'
        result = urllib.parse.urlencode(given, encoding="latin-1")
        self.assertEqual(expect, result)

    def test_urlencode_encoding_doseq(self):
        # ASCII Encoding. Expect %3F with errors="replace'
        given = (('\u00a0', '\u00c1'),)
        expect = '%3F=%3F'
        result = urllib.parse.urlencode(given, doseq=True,
                                        encoding="ASCII", errors="replace")
        self.assertEqual(expect, result)

        # ASCII Encoding. On a sequence of values.
        given = (("\u00a0", (1, "\u00c1")),)
        expect = '%3F=1&%3F=%3F'
        result = urllib.parse.urlencode(given, True,
                                        encoding="ASCII", errors="replace")
        self.assertEqual(expect, result)

        # Utf-8
        given = (("\u00a0", "\u00c1"),)
        expect = '%C2%A0=%C3%81'
        result = urllib.parse.urlencode(given, True)
        self.assertEqual(expect, result)

        given = (("\u00a0", (42, "\u00c1")),)
        expect = '%C2%A0=42&%C2%A0=%C3%81'
        result = urllib.parse.urlencode(given, True)
        self.assertEqual(expect, result)

        # latin-1
        given = (("\u00a0", "\u00c1"),)
        expect = '%A0=%C1'
        result = urllib.parse.urlencode(given, True, encoding="latin-1")
        self.assertEqual(expect, result)

        given = (("\u00a0", (42, "\u00c1")),)
        expect = '%A0=42&%A0=%C1'
        result = urllib.parse.urlencode(given, True, encoding="latin-1")
        self.assertEqual(expect, result)

    def test_urlencode_bytes(self):
        given = ((b'\xa0\x24', b'\xc1\x24'),)
        expect = '%A0%24=%C1%24'
        result = urllib.parse.urlencode(given)
        self.assertEqual(expect, result)
        result = urllib.parse.urlencode(given, True)
        self.assertEqual(expect, result)

        # Sequence of values
        given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
        expect = '%A0%24=42&%A0%24=%C1%24'
        result = urllib.parse.urlencode(given, True)
        self.assertEqual(expect, result)

    def test_urlencode_encoding_safe_parameter(self):

        # Send '$' (\x24) as safe character
        # Default utf-8 encoding

        given = ((b'\xa0\x24', b'\xc1\x24'),)
        result = urllib.parse.urlencode(given, safe=":$")
        expect = '%A0$=%C1$'
        self.assertEqual(expect, result)

        given = ((b'\xa0\x24', b'\xc1\x24'),)
        result = urllib.parse.urlencode(given, doseq=True, safe=":$")
        expect = '%A0$=%C1$'
        self.assertEqual(expect, result)

        # Safe parameter in sequence
        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
        result = urllib.parse.urlencode(given, True, safe=":$")
        self.assertEqual(expect, result)

        # Test all above in latin-1 encoding

        given = ((b'\xa0\x24', b'\xc1\x24'),)
        result = urllib.parse.urlencode(given, safe=":$",
                                        encoding="latin-1")
        expect = '%A0$=%C1$'
        self.assertEqual(expect, result)

        given = ((b'\xa0\x24', b'\xc1\x24'),)
        expect = '%A0$=%C1$'
        result = urllib.parse.urlencode(given, doseq=True, safe=":$",
                                        encoding="latin-1")

        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
        result = urllib.parse.urlencode(given, True, safe=":$",
                                        encoding="latin-1")
        self.assertEqual(expect, result)

1327 1328 1329 1330 1331 1332 1333
class Pathname_Tests(unittest.TestCase):
    """Test pathname2url() and url2pathname()"""

    def test_basic(self):
        # Make sure simple tests pass
        expected_path = os.path.join("parts", "of", "a", "path")
        expected_url = "parts/of/a/path"
1334
        result = urllib.request.pathname2url(expected_path)
1335 1336 1337
        self.assertEqual(expected_url, result,
                         "pathname2url() failed; %s != %s" %
                         (result, expected_url))
1338
        result = urllib.request.url2pathname(expected_url)
1339 1340 1341 1342 1343 1344 1345 1346
        self.assertEqual(expected_path, result,
                         "url2pathame() failed; %s != %s" %
                         (result, expected_path))

    def test_quoting(self):
        # Test automatic quoting and unquoting works for pathnam2url() and
        # url2pathname() respectively
        given = os.path.join("needs", "quot=ing", "here")
1347 1348
        expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
        result = urllib.request.pathname2url(given)
1349 1350 1351 1352
        self.assertEqual(expect, result,
                         "pathname2url() failed; %s != %s" %
                         (expect, result))
        expect = given
1353
        result = urllib.request.url2pathname(result)
1354 1355 1356 1357
        self.assertEqual(expect, result,
                         "url2pathname() failed; %s != %s" %
                         (expect, result))
        given = os.path.join("make sure", "using_quote")
1358 1359
        expect = "%s/using_quote" % urllib.parse.quote("make sure")
        result = urllib.request.pathname2url(given)
1360 1361 1362 1363 1364
        self.assertEqual(expect, result,
                         "pathname2url() failed; %s != %s" %
                         (expect, result))
        given = "make+sure/using_unquote"
        expect = os.path.join("make+sure", "using_unquote")
1365
        result = urllib.request.url2pathname(given)
1366 1367 1368
        self.assertEqual(expect, result,
                         "url2pathname() failed; %s != %s" %
                         (expect, result))
Tim Peters's avatar
Tim Peters committed
1369

1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386
    @unittest.skipUnless(sys.platform == 'win32',
                         'test specific to the urllib.url2path function.')
    def test_ntpath(self):
        given = ('/C:/', '///C:/', '/C|//')
        expect = 'C:\\'
        for url in given:
            result = urllib.request.url2pathname(url)
            self.assertEqual(expect, result,
                             'urllib.request..url2pathname() failed; %s != %s' %
                             (expect, result))
        given = '///C|/path'
        expect = 'C:\\path'
        result = urllib.request.url2pathname(given)
        self.assertEqual(expect, result,
                         'urllib.request.url2pathname() failed; %s != %s' %
                         (expect, result))

1387 1388 1389
class Utility_Tests(unittest.TestCase):
    """Testcase to test the various utility functions in the urllib."""

1390 1391 1392 1393
    def test_thishost(self):
        """Test the urllib.request.thishost utility function returns a tuple"""
        self.assertIsInstance(urllib.request.thishost(), tuple)

1394 1395 1396 1397 1398 1399 1400 1401

class URLopener_Tests(unittest.TestCase):
    """Testcase to test the open method of URLopener class."""

    def test_quoted_open(self):
        class DummyURLopener(urllib.request.URLopener):
            def open_spam(self, url):
                return url
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
        with support.check_warnings(
                ('DummyURLopener style of invoking requests is deprecated.',
                DeprecationWarning)):
            self.assertEqual(DummyURLopener().open(
                'spam://example/ /'),'//example/%20/')

            # test the safe characters are not quoted by urlopen
            self.assertEqual(DummyURLopener().open(
                "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
                "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1412

1413 1414
# Just commented them out.
# Can't really tell why keep failing in windows and sparc.
1415
# Everywhere else they work ok, but on those machines, sometimes
1416 1417
# fail in one of the tests, sometimes in other. I have a linux, and
# the tests go ok.
1418
# If anybody has one of the problematic environments, please help!
1419 1420 1421
# .   Facundo
#
# def server(evt):
Georg Brandl's avatar
Georg Brandl committed
1422
#     import socket, time
1423 1424 1425 1426
#     serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#     serv.settimeout(3)
#     serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
#     serv.bind(("", 9093))
1427
#     serv.listen()
1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446
#     try:
#         conn, addr = serv.accept()
#         conn.send("1 Hola mundo\n")
#         cantdata = 0
#         while cantdata < 13:
#             data = conn.recv(13-cantdata)
#             cantdata += len(data)
#             time.sleep(.3)
#         conn.send("2 No more lines\n")
#         conn.close()
#     except socket.timeout:
#         pass
#     finally:
#         serv.close()
#         evt.set()
#
# class FTPWrapperTests(unittest.TestCase):
#
#     def setUp(self):
Georg Brandl's avatar
Georg Brandl committed
1447
#         import ftplib, time, threading
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
#         ftplib.FTP.port = 9093
#         self.evt = threading.Event()
#         threading.Thread(target=server, args=(self.evt,)).start()
#         time.sleep(.1)
#
#     def tearDown(self):
#         self.evt.wait()
#
#     def testBasic(self):
#         # connects
#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandl's avatar
Georg Brandl committed
1459
#         ftp.close()
1460
#
Georg Brandl's avatar
Georg Brandl committed
1461 1462 1463
#     def testTimeoutNone(self):
#         # global default timeout is ignored
#         import socket
1464
#         self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl's avatar
Georg Brandl committed
1465 1466 1467 1468 1469
#         socket.setdefaulttimeout(30)
#         try:
#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
#         finally:
#             socket.setdefaulttimeout(None)
1470
#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandl's avatar
Georg Brandl committed
1471
#         ftp.close()
1472
#
Georg Brandl's avatar
Georg Brandl committed
1473 1474 1475
#     def testTimeoutDefault(self):
#         # global default timeout is used
#         import socket
1476
#         self.assertIsNone(socket.getdefaulttimeout())
1477 1478 1479 1480
#         socket.setdefaulttimeout(30)
#         try:
#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
#         finally:
Georg Brandl's avatar
Georg Brandl committed
1481
#             socket.setdefaulttimeout(None)
1482
#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandl's avatar
Georg Brandl committed
1483
#         ftp.close()
1484
#
Georg Brandl's avatar
Georg Brandl committed
1485 1486 1487 1488 1489
#     def testTimeoutValue(self):
#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
#                                 timeout=30)
#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
#         ftp.close()
1490

Senthil Kumaran's avatar
Senthil Kumaran committed
1491

1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
class RequestTests(unittest.TestCase):
    """Unit tests for urllib.request.Request."""

    def test_default_values(self):
        Request = urllib.request.Request
        request = Request("http://www.python.org")
        self.assertEqual(request.get_method(), 'GET')
        request = Request("http://www.python.org", {})
        self.assertEqual(request.get_method(), 'POST')

    def test_with_method_arg(self):
        Request = urllib.request.Request
        request = Request("http://www.python.org", method='HEAD')
        self.assertEqual(request.method, 'HEAD')
        self.assertEqual(request.get_method(), 'HEAD')
        request = Request("http://www.python.org", {}, method='HEAD')
        self.assertEqual(request.method, 'HEAD')
        self.assertEqual(request.get_method(), 'HEAD')
        request = Request("http://www.python.org", method='GET')
        self.assertEqual(request.get_method(), 'GET')
        request.method = 'HEAD'
        self.assertEqual(request.get_method(), 'HEAD')
1514 1515


1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540
class URL2PathNameTests(unittest.TestCase):

    def test_converting_drive_letter(self):
        self.assertEqual(url2pathname("///C|"), 'C:')
        self.assertEqual(url2pathname("///C:"), 'C:')
        self.assertEqual(url2pathname("///C|/"), 'C:\\')

    def test_converting_when_no_drive_letter(self):
        # cannot end a raw string in \
        self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
        self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')

    def test_simple_compare(self):
        self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
                         r'C:\foo\bar\spam.foo')

    def test_non_ascii_drive_letter(self):
        self.assertRaises(IOError, url2pathname, "///\u00e8|/")

    def test_roundtrip_url2pathname(self):
        list_of_paths = ['C:',
                         r'\\\C\test\\',
                         r'C:\foo\bar\spam.foo'
                         ]
        for path in list_of_paths:
1541
            self.assertEqual(url2pathname(pathname2url(path)), path)
1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559

class PathName2URLTests(unittest.TestCase):

    def test_converting_drive_letter(self):
        self.assertEqual(pathname2url("C:"), '///C:')
        self.assertEqual(pathname2url("C:\\"), '///C:')

    def test_converting_when_no_drive_letter(self):
        self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
                         '/////folder/test/')
        self.assertEqual(pathname2url(r"\\folder\test" "\\"),
                         '////folder/test/')
        self.assertEqual(pathname2url(r"\folder\test" "\\"),
                         '/folder/test/')

    def test_simple_compare(self):
        self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
                         "///C:/foo/bar/spam.foo" )
1560

1561 1562
    def test_long_drive_letter(self):
        self.assertRaises(IOError, pathname2url, "XX:\\")
1563

1564 1565 1566 1567 1568
    def test_roundtrip_pathname2url(self):
        list_of_paths = ['///C:',
                         '/////folder/test/',
                         '///C:/foo/bar/spam.foo']
        for path in list_of_paths:
1569
            self.assertEqual(pathname2url(url2pathname(path)), path)
1570

1571
if __name__ == '__main__':
1572
    unittest.main()