test_encoding.py 5.6 KB
Newer Older
1 2 3
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals

4
import datetime
5
import unittest
6

7
from django.utils import six
8
from django.utils.encoding import (
9 10
    escape_uri_path, filepath_to_uri, force_bytes, force_text, iri_to_uri,
    smart_text, uri_to_iri,
11
)
12
from django.utils.functional import SimpleLazyObject
13
from django.utils.http import urlquote_plus
14 15 16


class TestEncodingUtils(unittest.TestCase):
17 18 19 20 21 22 23 24 25 26 27 28 29 30
    def test_force_text_exception(self):
        """
        Check that broken __unicode__/__str__ actually raises an error.
        """
        class MyString(object):
            def __str__(self):
                return b'\xc3\xb6\xc3\xa4\xc3\xbc'

            __unicode__ = __str__

        # str(s) raises a TypeError on python 3 if the result is not a text type.
        # python 2 fails when it tries converting from str to unicode (via ASCII).
        exception = TypeError if six.PY3 else UnicodeError
        self.assertRaises(exception, force_text, MyString())
31

32 33 34 35
    def test_force_text_lazy(self):
        s = SimpleLazyObject(lambda: 'x')
        self.assertTrue(issubclass(type(force_text(s)), six.text_type))

36 37 38 39 40 41 42 43 44
    def test_force_bytes_exception(self):
        """
        Test that force_bytes knows how to convert to bytes an exception
        containing non-ASCII characters in its args.
        """
        error_msg = "This is an exception, voilà"
        exc = ValueError(error_msg)
        result = force_bytes(exc)
        self.assertEqual(result, error_msg.encode('utf-8'))
45

46 47 48 49
    def test_force_bytes_strings_only(self):
        today = datetime.date.today()
        self.assertEqual(force_bytes(today, strings_only=True), today)

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
    def test_smart_text(self):
        class Test:
            if six.PY3:
                def __str__(self):
                    return 'ŠĐĆŽćžšđ'
            else:
                def __str__(self):
                    return 'ŠĐĆŽćžšđ'.encode('utf-8')

        class TestU:
            if six.PY3:
                def __str__(self):
                    return 'ŠĐĆŽćžšđ'

                def __bytes__(self):
                    return b'Foo'
            else:
                def __str__(self):
                    return b'Foo'

                def __unicode__(self):
                    return '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111'

        self.assertEqual(smart_text(Test()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
        self.assertEqual(smart_text(TestU()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
        self.assertEqual(smart_text(1), '1')
        self.assertEqual(smart_text('foo'), 'foo')
77

78 79 80

class TestRFC3987IEncodingUtils(unittest.TestCase):

81 82 83 84 85
    def test_filepath_to_uri(self):
        self.assertEqual(filepath_to_uri('upload\\чубака.mp4'),
            'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
        self.assertEqual(filepath_to_uri('upload\\чубака.mp4'.encode('utf-8')),
            'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
86

87
    def test_iri_to_uri(self):
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
        cases = [
            # Valid UTF-8 sequences are encoded.
            ('red%09rosé#red', 'red%09ros%C3%A9#red'),
            ('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
            ('locations/%s' % urlquote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),

            # Reserved chars remain unescaped.
            ('%&', '%&'),
            ('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
        ]

        for iri, uri in cases:
            self.assertEqual(iri_to_uri(iri), uri)

            # Test idempotency.
            self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)

    def test_uri_to_iri(self):
        cases = [
            # Valid UTF-8 sequences are decoded.
            ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),

            # Broken UTF-8 sequences remain escaped.
            ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
            ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
            ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
            ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93%9C%93'),
        ]
118

119 120
        for uri, iri in cases:
            self.assertEqual(uri_to_iri(uri), iri)
121

122 123
            # Test idempotency.
            self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
124

125 126 127 128 129 130 131 132 133 134 135 136 137
    def test_complementarity(self):
        cases = [
            ('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'),
            ('%&', '%&'),
            ('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
            ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
            ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
            ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
            ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
            ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
            ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93%9C%93'),
        ]
138

139 140 141
        for uri, iri in cases:
            self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
            self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
142 143 144 145 146 147 148 149

    def test_escape_uri_path(self):
        self.assertEqual(
            escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
            '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
        )
        self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
        self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')