Kaydet (Commit) df204be9 authored tarafından Antoine Pitrou's avatar Antoine Pitrou

Issue #16423: urllib.request now has support for ``data:`` URLs.

Patch by Mathias Panzenböck.
üst a833e0d8
...@@ -121,7 +121,7 @@ The :mod:`urllib.request` module defines the following functions: ...@@ -121,7 +121,7 @@ The :mod:`urllib.request` module defines the following functions:
instances of them or subclasses of them: :class:`ProxyHandler`, instances of them or subclasses of them: :class:`ProxyHandler`,
:class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`, :class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`,
:class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`, :class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`,
:class:`HTTPErrorProcessor`. :class:`HTTPErrorProcessor`, :class:`DataHandler`.
If the Python installation has SSL support (i.e., if the :mod:`ssl` module If the Python installation has SSL support (i.e., if the :mod:`ssl` module
can be imported), :class:`HTTPSHandler` will also be added. can be imported), :class:`HTTPSHandler` will also be added.
...@@ -346,6 +346,11 @@ The following classes are provided: ...@@ -346,6 +346,11 @@ The following classes are provided:
Open local files. Open local files.
.. class:: DataHandler()
Open data URLs.
.. versionadded:: 3.4
.. class:: FTPHandler() .. class:: FTPHandler()
...@@ -972,6 +977,21 @@ FileHandler Objects ...@@ -972,6 +977,21 @@ FileHandler Objects
hostname is given, an :exc:`URLError` is raised. hostname is given, an :exc:`URLError` is raised.
.. _data-handler-objects:
DataHandler Objects
-------------------
.. method:: DataHandler.data_open(req)
Read a data URL. This kind of URL contains the content encoded in the URL
itself. The data URL syntax is specified in :rfc:`2397`. This implementation
ignores white spaces in base64 encoded data URLs so the URL may be wrapped
in whatever source file it comes from. But even though some browsers don't
mind about a missing padding at the end of a base64 encoded data URL, this
implementation will raise an :exc:`ValueError` in that case.
.. _ftp-handler-objects: .. _ftp-handler-objects:
FTPHandler Objects FTPHandler Objects
...@@ -1374,7 +1394,9 @@ some point in the future. ...@@ -1374,7 +1394,9 @@ some point in the future.
pair: FTP; protocol pair: FTP; protocol
* Currently, only the following protocols are supported: HTTP (versions 0.9 and * Currently, only the following protocols are supported: HTTP (versions 0.9 and
1.0), FTP, and local files. 1.0), FTP, local files, and data URLs.
.. versionchanged:: 3.4 Added support for data URLs.
* The caching feature of :func:`urlretrieve` has been disabled until someone * The caching feature of :func:`urlretrieve` has been disabled until someone
finds the time to hack proper processing of Expiration time headers. finds the time to hack proper processing of Expiration time headers.
......
...@@ -337,6 +337,79 @@ Content-Type: text/html; charset=iso-8859-1 ...@@ -337,6 +337,79 @@ Content-Type: text/html; charset=iso-8859-1
with support.check_warnings(('',DeprecationWarning)): with support.check_warnings(('',DeprecationWarning)):
urllib.request.URLopener() urllib.request.URLopener()
class urlopen_DataTests(unittest.TestCase):
"""Test urlopen() opening a data URL."""
def setUp(self):
# text containing URL special- and unicode-characters
self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
# 2x1 pixel RGB PNG image with one black and one white pixel
self.image = (
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
self.text_url = (
"data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
"D%26%20%C3%B6%20%C3%84%20")
self.text_url_base64 = (
"data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
"sJT0mIPYgxCA%3D")
# base64 encoded data URL that contains ignorable spaces,
# such as "\n", " ", "%0A", and "%20".
self.image_url = (
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
"QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
"vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
self.text_url_resp = urllib.request.urlopen(self.text_url)
self.text_url_base64_resp = urllib.request.urlopen(
self.text_url_base64)
self.image_url_resp = urllib.request.urlopen(self.image_url)
def test_interface(self):
# Make sure object returned by urlopen() has the specified methods
for attr in ("read", "readline", "readlines",
"close", "info", "geturl", "getcode", "__iter__"):
self.assertTrue(hasattr(self.text_url_resp, attr),
"object returned by urlopen() lacks %s attribute" %
attr)
def test_info(self):
self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
self.assertEqual(self.text_url_base64_resp.info().get_params(),
[('text/plain', ''), ('charset', 'ISO-8859-1')])
self.assertEqual(self.image_url_resp.info()['content-length'],
str(len(self.image)))
self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
[('text/plain', ''), ('charset', 'US-ASCII')])
def test_geturl(self):
self.assertEqual(self.text_url_resp.geturl(), self.text_url)
self.assertEqual(self.text_url_base64_resp.geturl(),
self.text_url_base64)
self.assertEqual(self.image_url_resp.geturl(), self.image_url)
def test_read_text(self):
self.assertEqual(self.text_url_resp.read().decode(
dict(self.text_url_resp.info().get_params())['charset']), self.text)
def test_read_text_base64(self):
self.assertEqual(self.text_url_base64_resp.read().decode(
dict(self.text_url_base64_resp.info().get_params())['charset']),
self.text)
def test_read_image(self):
self.assertEqual(self.image_url_resp.read(), self.image)
def test_missing_comma(self):
self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
def test_invalid_base64_data(self):
# missing padding character
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
class urlretrieve_FileTests(unittest.TestCase): class urlretrieve_FileTests(unittest.TestCase):
"""Test urllib.urlretrieve() on local files""" """Test urllib.urlretrieve() on local files"""
...@@ -1313,6 +1386,7 @@ def test_main(): ...@@ -1313,6 +1386,7 @@ def test_main():
support.run_unittest( support.run_unittest(
urlopen_FileTests, urlopen_FileTests,
urlopen_HttpTests, urlopen_HttpTests,
urlopen_DataTests,
urlretrieve_FileTests, urlretrieve_FileTests,
urlretrieve_HttpTests, urlretrieve_HttpTests,
ProxyTests, ProxyTests,
......
...@@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError ...@@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import ( from urllib.parse import (
urlparse, urlsplit, urljoin, unwrap, quote, unquote, urlparse, urlsplit, urljoin, unwrap, quote, unquote,
splittype, splithost, splitport, splituser, splitpasswd, splittype, splithost, splitport, splituser, splitpasswd,
splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) splitattr, splitquery, splitvalue, splittag, to_bytes,
unquote_to_bytes, urlunparse)
from urllib.response import addinfourl, addclosehook from urllib.response import addinfourl, addclosehook
# check for SSL # check for SSL
...@@ -121,7 +122,7 @@ __all__ = [ ...@@ -121,7 +122,7 @@ __all__ = [
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
'UnknownHandler', 'HTTPErrorProcessor', 'UnknownHandler', 'HTTPErrorProcessor',
# Functions # Functions
'urlopen', 'install_opener', 'build_opener', 'urlopen', 'install_opener', 'build_opener',
...@@ -535,7 +536,8 @@ def build_opener(*handlers): ...@@ -535,7 +536,8 @@ def build_opener(*handlers):
opener = OpenerDirector() opener = OpenerDirector()
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
HTTPDefaultErrorHandler, HTTPRedirectHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler,
FTPHandler, FileHandler, HTTPErrorProcessor] FTPHandler, FileHandler, HTTPErrorProcessor,
DataHandler]
if hasattr(http.client, "HTTPSConnection"): if hasattr(http.client, "HTTPSConnection"):
default_classes.append(HTTPSHandler) default_classes.append(HTTPSHandler)
skip = set() skip = set()
...@@ -1541,6 +1543,36 @@ class CacheFTPHandler(FTPHandler): ...@@ -1541,6 +1543,36 @@ class CacheFTPHandler(FTPHandler):
self.cache.clear() self.cache.clear()
self.timeout.clear() self.timeout.clear()
class DataHandler(BaseHandler):
def data_open(self, req):
# data URLs as specified in RFC 2397.
#
# ignores POSTed data
#
# syntax:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
url = req.full_url
scheme, data = url.split(":",1)
mediatype, data = data.split(",",1)
# even base64 encoded data URLs might be quoted so unquote in any case:
data = unquote_to_bytes(data)
if mediatype.endswith(";base64"):
data = base64.decodebytes(data)
mediatype = mediatype[:-7]
if not mediatype:
mediatype = "text/plain;charset=US-ASCII"
headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
(mediatype, len(data)))
return addinfourl(io.BytesIO(data), headers, url)
# Code move from the old urllib module # Code move from the old urllib module
......
...@@ -884,6 +884,7 @@ Mike Pall ...@@ -884,6 +884,7 @@ Mike Pall
Todd R. Palmer Todd R. Palmer
Juan David Ibáñez Palomar Juan David Ibáñez Palomar
Jan Palus Jan Palus
Mathias Panzenböck
M. Papillon M. Papillon
Peter Parente Peter Parente
Alexandre Parenteau Alexandre Parenteau
......
...@@ -138,6 +138,9 @@ Core and Builtins ...@@ -138,6 +138,9 @@ Core and Builtins
Library Library
------- -------
- Issue #16423: urllib.request now has support for ``data:`` URLs. Patch by
Mathias Panzenböck.
- Issue #4473: Add a POP3.stls() to switch a clear-text POP3 session into - Issue #4473: Add a POP3.stls() to switch a clear-text POP3 session into
an encrypted POP3 session, on supported servers. Patch by Lorenzo Catucci. an encrypted POP3 session, on supported servers. Patch by Lorenzo Catucci.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment