test_unicodedata.py 8.47 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2
""" Test script for the unicodedata module.

3
    Written by Marc-Andre Lemburg (mal@lemburg.com).
Guido van Rossum's avatar
Guido van Rossum committed
4

5
    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
Guido van Rossum's avatar
Guido van Rossum committed
6 7

"""#"
8
import unittest, test.support
9
import hashlib
Guido van Rossum's avatar
Guido van Rossum committed
10

11 12
encoding = 'utf-8'

13 14 15

### Run tests

16 17 18
class UnicodeMethodsTest(unittest.TestCase):

    # update this, if the database changes
19
    expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a'
20 21

    def test_method_checksum(self):
22
        h = hashlib.sha1()
23
        for i in range(65536):
24
            char = chr(i)
25 26
            data = [
                # Predicates (single char)
27 28 29 30 31 32 33 34 35
                "01"[char.isalnum()],
                "01"[char.isalpha()],
                "01"[char.isdecimal()],
                "01"[char.isdigit()],
                "01"[char.islower()],
                "01"[char.isnumeric()],
                "01"[char.isspace()],
                "01"[char.istitle()],
                "01"[char.isupper()],
36 37

                # Predicates (multiple chars)
38 39 40 41 42 43 44 45 46
                "01"[(char + 'abc').isalnum()],
                "01"[(char + 'abc').isalpha()],
                "01"[(char + '123').isdecimal()],
                "01"[(char + '123').isdigit()],
                "01"[(char + 'abc').islower()],
                "01"[(char + '123').isnumeric()],
                "01"[(char + ' \t').isspace()],
                "01"[(char + 'abc').istitle()],
                "01"[(char + 'ABC').isupper()],
47 48 49 50 51 52 53

                # Mappings (single char)
                char.lower(),
                char.upper(),
                char.title(),

                # Mappings (multiple chars)
54 55 56 57
                (char + 'abc').lower(),
                (char + 'ABC').upper(),
                (char + 'abc').title(),
                (char + 'ABC').title(),
58 59

                ]
60
            h.update(''.join(data).encode(encoding))
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
        result = h.hexdigest()
        self.assertEqual(result, self.expectedchecksum)

class UnicodeDatabaseTest(unittest.TestCase):

    def setUp(self):
        # In case unicodedata is not available, this will raise an ImportError,
        # but the other test cases will still be run
        import unicodedata
        self.db = unicodedata

    def tearDown(self):
        del self.db

class UnicodeFunctionsTest(UnicodeDatabaseTest):

    # update this, if the database changes
78
    expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2'
79 80 81

    def test_function_checksum(self):
        data = []
82
        h = hashlib.sha1()
83 84

        for i in range(0x10000):
85
            char = chr(i)
86 87 88 89 90 91 92 93 94 95 96
            data = [
                # Properties
                str(self.db.digit(char, -1)),
                str(self.db.numeric(char, -1)),
                str(self.db.decimal(char, -1)),
                self.db.category(char),
                self.db.bidirectional(char),
                self.db.decomposition(char),
                str(self.db.mirrored(char)),
                str(self.db.combining(char)),
            ]
97
            h.update(''.join(data).encode("ascii"))
98 99 100 101
        result = h.hexdigest()
        self.assertEqual(result, self.expectedchecksum)

    def test_digit(self):
102 103 104 105
        self.assertEqual(self.db.digit('A', None), None)
        self.assertEqual(self.db.digit('9'), 9)
        self.assertEqual(self.db.digit('\u215b', None), None)
        self.assertEqual(self.db.digit('\u2468'), 9)
106
        self.assertEqual(self.db.digit('\U00020000', None), None)
107 108

        self.assertRaises(TypeError, self.db.digit)
109 110
        self.assertRaises(TypeError, self.db.digit, 'xx')
        self.assertRaises(ValueError, self.db.digit, 'x')
111 112

    def test_numeric(self):
113 114 115 116
        self.assertEqual(self.db.numeric('A',None), None)
        self.assertEqual(self.db.numeric('9'), 9)
        self.assertEqual(self.db.numeric('\u215b'), 0.125)
        self.assertEqual(self.db.numeric('\u2468'), 9.0)
117
        self.assertEqual(self.db.numeric('\U00020000', None), None)
118 119

        self.assertRaises(TypeError, self.db.numeric)
120 121
        self.assertRaises(TypeError, self.db.numeric, 'xx')
        self.assertRaises(ValueError, self.db.numeric, 'x')
122 123

    def test_decimal(self):
124 125 126 127
        self.assertEqual(self.db.decimal('A',None), None)
        self.assertEqual(self.db.decimal('9'), 9)
        self.assertEqual(self.db.decimal('\u215b', None), None)
        self.assertEqual(self.db.decimal('\u2468', None), None)
128
        self.assertEqual(self.db.decimal('\U00020000', None), None)
129 130

        self.assertRaises(TypeError, self.db.decimal)
131 132
        self.assertRaises(TypeError, self.db.decimal, 'xx')
        self.assertRaises(ValueError, self.db.decimal, 'x')
133 134

    def test_category(self):
135 136 137
        self.assertEqual(self.db.category('\uFFFE'), 'Cn')
        self.assertEqual(self.db.category('a'), 'Ll')
        self.assertEqual(self.db.category('A'), 'Lu')
138
        self.assertEqual(self.db.category('\U00020000'), 'Lo')
139 140

        self.assertRaises(TypeError, self.db.category)
141
        self.assertRaises(TypeError, self.db.category, 'xx')
142 143

    def test_bidirectional(self):
144 145 146
        self.assertEqual(self.db.bidirectional('\uFFFE'), '')
        self.assertEqual(self.db.bidirectional(' '), 'WS')
        self.assertEqual(self.db.bidirectional('A'), 'L')
147
        self.assertEqual(self.db.bidirectional('\U00020000'), 'L')
148 149

        self.assertRaises(TypeError, self.db.bidirectional)
150
        self.assertRaises(TypeError, self.db.bidirectional, 'xx')
151 152

    def test_decomposition(self):
153 154
        self.assertEqual(self.db.decomposition('\uFFFE'),'')
        self.assertEqual(self.db.decomposition('\u00bc'), '<fraction> 0031 2044 0034')
155 156

        self.assertRaises(TypeError, self.db.decomposition)
157
        self.assertRaises(TypeError, self.db.decomposition, 'xx')
158 159

    def test_mirrored(self):
160 161 162
        self.assertEqual(self.db.mirrored('\uFFFE'), 0)
        self.assertEqual(self.db.mirrored('a'), 0)
        self.assertEqual(self.db.mirrored('\u2201'), 1)
163
        self.assertEqual(self.db.mirrored('\U00020000'), 0)
164 165

        self.assertRaises(TypeError, self.db.mirrored)
166
        self.assertRaises(TypeError, self.db.mirrored, 'xx')
167 168

    def test_combining(self):
169 170 171
        self.assertEqual(self.db.combining('\uFFFE'), 0)
        self.assertEqual(self.db.combining('a'), 0)
        self.assertEqual(self.db.combining('\u20e1'), 230)
172
        self.assertEqual(self.db.combining('\U00020000'), 0)
173 174

        self.assertRaises(TypeError, self.db.combining)
175
        self.assertRaises(TypeError, self.db.combining, 'xx')
176 177 178

    def test_normalize(self):
        self.assertRaises(TypeError, self.db.normalize)
179 180
        self.assertRaises(ValueError, self.db.normalize, 'unknown', 'xx')
        self.assertEqual(self.db.normalize('NFKC', ''), '')
181 182 183
        # The rest can be found in test_normalization.py
        # which requires an external file.

184 185
    def test_east_asian_width(self):
        eaw = self.db.east_asian_width
186
        self.assertRaises(TypeError, eaw, b'a')
187
        self.assertRaises(TypeError, eaw, bytearray())
188 189 190 191 192 193 194 195
        self.assertRaises(TypeError, eaw, '')
        self.assertRaises(TypeError, eaw, 'ra')
        self.assertEqual(eaw('\x1e'), 'N')
        self.assertEqual(eaw('\x20'), 'Na')
        self.assertEqual(eaw('\uC894'), 'W')
        self.assertEqual(eaw('\uFF66'), 'H')
        self.assertEqual(eaw('\uFF1F'), 'F')
        self.assertEqual(eaw('\u2010'), 'A')
196
        self.assertEqual(eaw('\U00020000'), 'W')
197 198 199 200 201 202

class UnicodeMiscTest(UnicodeDatabaseTest):

    def test_decimal_numeric_consistent(self):
        # Test that decimal and numeric are consistent,
        # i.e. if a character has a decimal value,
203
        # its numeric value should be the same.
204
        count = 0
205
        for i in range(0x10000):
206
            c = chr(i)
207 208 209 210 211 212 213 214
            dec = self.db.decimal(c, -1)
            if dec != -1:
                self.assertEqual(dec, self.db.numeric(c))
                count += 1
        self.assert_(count >= 10) # should have tested at least the ASCII digits

    def test_digit_numeric_consistent(self):
        # Test that digit and numeric are consistent,
Tim Peters's avatar
Tim Peters committed
215
        # i.e. if a character has a digit value,
216
        # its numeric value should be the same.
217
        count = 0
218
        for i in range(0x10000):
219
            c = chr(i)
220 221 222 223 224 225
            dec = self.db.digit(c, -1)
            if dec != -1:
                self.assertEqual(dec, self.db.numeric(c))
                count += 1
        self.assert_(count >= 10) # should have tested at least the ASCII digits

226 227 228
    def test_bug_1704793(self):
        self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346')

229
def test_main():
230
    test.support.run_unittest(
231 232 233 234
        UnicodeMiscTest,
        UnicodeMethodsTest,
        UnicodeFunctionsTest
    )
235 236 237

if __name__ == "__main__":
    test_main()