test_hash.py 11.4 KB
Newer Older
1 2 3
# test the invariant that
#   iff a==b then hash(a)==hash(b)
#
4
# Also test that hash implementations are inherited as expected
5

6 7
import datetime
import os
8
import sys
9
import unittest
10
from test.support.script_helper import assert_python_ok
11
from collections import Hashable
12

13
IS_64BIT = sys.maxsize > 2**32
14

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
def lcg(x, length=16):
    """Linear congruential generator"""
    if x == 0:
        return bytes(length)
    out = bytearray(length)
    for i in range(length):
        x = (214013 * x + 2531011) & 0x7fffffff
        out[i] = (x >> 16) & 0xff
    return bytes(out)

def pysiphash(uint64):
    """Convert SipHash24 output to Py_hash_t
    """
    assert 0 <= uint64 < (1 << 64)
    # simple unsigned to signed int64
    if uint64 > (1 << 63) - 1:
        int64 = uint64 - (1 << 64)
    else:
        int64 = uint64
    # mangle uint64 to uint32
    uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
    # simple unsigned to signed int32
    if uint32 > (1 << 31) - 1:
        int32 = uint32 - (1 << 32)
    else:
        int32 = uint32
    return int32, int64

def skip_unless_internalhash(test):
    """Skip decorator for tests that depend on SipHash24 or FNV"""
    ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
    msg = "Requires SipHash24 or FNV"
    return test if ok else unittest.skip(msg)(test)

49

50
class HashEqualityTestCase(unittest.TestCase):
51

52
    def same_hash(self, *objlist):
Fred Drake's avatar
Fred Drake committed
53 54
        # Hash each object given and fail if
        # the hash values are not all the same.
55
        hashed = list(map(hash, objlist))
56 57
        for h in hashed[1:]:
            if h != hashed[0]:
58
                self.fail("hashed values differ: %r" % (objlist,))
59

60
    def test_numeric_literals(self):
61
        self.same_hash(1, 1, 1.0, 1.0+0.0j)
62 63 64
        self.same_hash(0, 0.0, 0.0+0.0j)
        self.same_hash(-1, -1.0, -1.0+0.0j)
        self.same_hash(-2, -2.0, -2.0+0.0j)
65

66
    def test_coerced_integers(self):
67
        self.same_hash(int(1), int(1), float(1), complex(1),
68
                       int('1'), float('1.0'))
69 70 71 72 73 74
        self.same_hash(int(-2**31), float(-2**31))
        self.same_hash(int(1-2**31), float(1-2**31))
        self.same_hash(int(2**31-1), float(2**31-1))
        # for 64-bit platforms
        self.same_hash(int(2**31), float(2**31))
        self.same_hash(int(-2**63), float(-2**63))
Guilherme Polo's avatar
Guilherme Polo committed
75
        self.same_hash(int(2**63), float(2**63))
76

77
    def test_coerced_floats(self):
78
        self.same_hash(int(1.23e300), float(1.23e300))
79
        self.same_hash(float(0.5), complex(0.5, 0.0))
80

81 82 83 84 85 86 87 88 89 90
    def test_unaligned_buffers(self):
        # The hash function for bytes-like objects shouldn't have
        # alignment-dependent results (example in issue #16427).
        b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128
        for i in range(16):
            for j in range(16):
                aligned = b[i:128+j]
                unaligned = memoryview(b)[i:128+j]
                self.assertEqual(hash(aligned), hash(unaligned))

91

92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
_default_hash = object.__hash__
class DefaultHash(object): pass

_FIXED_HASH_VALUE = 42
class FixedHash(object):
    def __hash__(self):
        return _FIXED_HASH_VALUE

class OnlyEquality(object):
    def __eq__(self, other):
        return self is other

class OnlyInequality(object):
    def __ne__(self, other):
        return self is not other

class InheritedHashWithEquality(FixedHash, OnlyEquality): pass
class InheritedHashWithInequality(FixedHash, OnlyInequality): pass

class NoHash(object):
    __hash__ = None

class HashInheritanceTestCase(unittest.TestCase):
    default_expected = [object(),
                        DefaultHash(),
                        OnlyInequality(),
                       ]
    fixed_expected = [FixedHash(),
                      InheritedHashWithEquality(),
                      InheritedHashWithInequality(),
                      ]
    error_expected = [NoHash(),
                      OnlyEquality(),
                      ]

    def test_default_hash(self):
        for obj in self.default_expected:
            self.assertEqual(hash(obj), _default_hash(obj))

    def test_fixed_hash(self):
        for obj in self.fixed_expected:
            self.assertEqual(hash(obj), _FIXED_HASH_VALUE)

    def test_error_hash(self):
        for obj in self.error_expected:
            self.assertRaises(TypeError, hash, obj)

    def test_hashable(self):
        objects = (self.default_expected +
                   self.fixed_expected)
        for obj in objects:
143
            self.assertIsInstance(obj, Hashable)
144 145 146

    def test_not_hashable(self):
        for obj in self.error_expected:
147
            self.assertNotIsInstance(obj, Hashable)
148 149


150 151 152 153 154 155 156 157 158
# Issue #4701: Check that some builtin types are correctly hashable
class DefaultIterSeq(object):
    seq = range(10)
    def __len__(self):
        return len(self.seq)
    def __getitem__(self, index):
        return self.seq[index]

class HashBuiltinsTestCase(unittest.TestCase):
159
    hashes_to_check = [enumerate(range(10)),
160 161 162 163 164 165 166 167 168
                       iter(DefaultIterSeq()),
                       iter(lambda: 0, 0),
                      ]

    def test_hashes(self):
        _default_hash = object.__hash__
        for obj in self.hashes_to_check:
            self.assertEqual(hash(obj), _default_hash(obj))

169
class HashRandomizationTests:
170 171 172 173 174

    # Each subclass should define a field "repr_", containing the repr() of
    # an object to be tested

    def get_hash_command(self, repr_):
175
        return 'print(hash(eval(%a)))' % repr_
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197

    def get_hash(self, repr_, seed=None):
        env = os.environ.copy()
        env['__cleanenv'] = True  # signal to assert_python not to do a copy
                                  # of os.environ on its own
        if seed is not None:
            env['PYTHONHASHSEED'] = str(seed)
        else:
            env.pop('PYTHONHASHSEED', None)
        out = assert_python_ok(
            '-c', self.get_hash_command(repr_),
            **env)
        stdout = out[1].strip()
        return int(stdout)

    def test_randomized_hash(self):
        # two runs should return different hashes
        run1 = self.get_hash(self.repr_, seed='random')
        run2 = self.get_hash(self.repr_, seed='random')
        self.assertNotEqual(run1, run2)

class StringlikeHashRandomizationTests(HashRandomizationTests):
198 199 200 201 202 203 204 205 206 207 208 209
    repr_ = None
    repr_long = None

    # 32bit little, 64bit little, 32bit big, 64bit big
    known_hashes = {
        'djba33x': [ # only used for small strings
            # seed 0, 'abc'
            [193485960, 193485960,  193485960, 193485960],
            # seed 42, 'abc'
            [-678966196, 573763426263223372, -820489388, -4282905804826039665],
            ],
        'siphash24': [
210
            # NOTE: PyUCS2 layout depends on endianess
211
            # seed 0, 'abc'
212
            [1198583518, 4596069200710135518, 1198583518, 4596069200710135518],
213
            # seed 42, 'abc'
214
            [273876886, -4501618152524544106, 273876886, -4501618152524544106],
215
            # seed 42, 'abcdefghijk'
216 217 218
            [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975],
            # seed 0, 'äú∑ℇ'
            [493570806, 5749986484189612790, -1006381564, -5915111450199468540],
219
            # seed 42, 'äú∑ℇ'
220
            [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017],
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
        ],
        'fnv': [
            # seed 0, 'abc'
            [-1600925533, 1453079729188098211, -1600925533,
             1453079729188098211],
            # seed 42, 'abc'
            [-206076799, -4410911502303878509, -1024014457,
             -3570150969479994130],
            # seed 42, 'abcdefghijk'
            [811136751, -5046230049376118746, -77208053 ,
             -4779029615281019666],
            # seed 0, 'äú∑ℇ'
            [44402817, 8998297579845987431, -1956240331,
             -782697888614047887],
            # seed 42, 'äú∑ℇ'
236 237
            [-283066365, -4576729883824601543, -271871407,
             -3927695501187247084],
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
        ]
    }

    def get_expected_hash(self, position, length):
        if length < sys.hash_info.cutoff:
            algorithm = "djba33x"
        else:
            algorithm = sys.hash_info.algorithm
        if sys.byteorder == 'little':
            platform = 1 if IS_64BIT else 0
        else:
            assert(sys.byteorder == 'big')
            platform = 3 if IS_64BIT else 2
        return self.known_hashes[algorithm][position][platform]

253 254
    def test_null_hash(self):
        # PYTHONHASHSEED=0 disables the randomized hash
255
        known_hash_of_obj = self.get_expected_hash(0, 3)
256

257 258
        # Randomization is enabled by default:
        self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
259 260 261 262

        # It can also be disabled by setting the seed to 0:
        self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)

263
    @skip_unless_internalhash
264 265 266
    def test_fixed_hash(self):
        # test a fixed seed for the randomized hash
        # Note that all types share the same values:
267
        h = self.get_expected_hash(1, 3)
268 269
        self.assertEqual(self.get_hash(self.repr_, seed=42), h)

270 271 272 273 274 275 276 277
    @skip_unless_internalhash
    def test_long_fixed_hash(self):
        if self.repr_long is None:
            return
        h = self.get_expected_hash(2, 11)
        self.assertEqual(self.get_hash(self.repr_long, seed=42), h)


278 279
class StrHashRandomizationTests(StringlikeHashRandomizationTests,
                                unittest.TestCase):
280
    repr_ = repr('abc')
281 282
    repr_long = repr('abcdefghijk')
    repr_ucs2 = repr('äú∑ℇ')
283

284
    @skip_unless_internalhash
285 286 287
    def test_empty_string(self):
        self.assertEqual(hash(""), 0)

288 289 290 291 292 293 294
    @skip_unless_internalhash
    def test_ucs2_string(self):
        h = self.get_expected_hash(3, 6)
        self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
        h = self.get_expected_hash(4, 6)
        self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)

295 296
class BytesHashRandomizationTests(StringlikeHashRandomizationTests,
                                  unittest.TestCase):
297
    repr_ = repr(b'abc')
298
    repr_long = repr(b'abcdefghijk')
299

300
    @skip_unless_internalhash
301 302 303
    def test_empty_string(self):
        self.assertEqual(hash(b""), 0)

304 305
class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests,
                                       unittest.TestCase):
306
    repr_ = "memoryview(b'abc')"
307
    repr_long = "memoryview(b'abcdefghijk')"
308

309
    @skip_unless_internalhash
310 311 312
    def test_empty_string(self):
        self.assertEqual(hash(memoryview(b"")), 0)

313 314 315 316
class DatetimeTests(HashRandomizationTests):
    def get_hash_command(self, repr_):
        return 'import datetime; print(hash(%s))' % repr_

317
class DatetimeDateTests(DatetimeTests, unittest.TestCase):
318 319
    repr_ = repr(datetime.date(1066, 10, 14))

320
class DatetimeDatetimeTests(DatetimeTests, unittest.TestCase):
321 322
    repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))

323
class DatetimeTimeTests(DatetimeTests, unittest.TestCase):
324 325 326
    repr_ = repr(datetime.time(0))


327 328 329 330 331 332 333
class HashDistributionTestCase(unittest.TestCase):

    def test_hash_distribution(self):
        # check for hash collision
        base = "abcdefghabcdefg"
        for i in range(1, len(base)):
            prefix = base[:i]
334 335 336 337 338 339 340 341 342 343
            with self.subTest(prefix=prefix):
                s15 = set()
                s255 = set()
                for c in range(256):
                    h = hash(prefix + chr(c))
                    s15.add(h & 0xf)
                    s255.add(h & 0xff)
                # SipHash24 distribution depends on key, usually > 60%
                self.assertGreater(len(s15), 8, prefix)
                self.assertGreater(len(s255), 128, prefix)
344

345
if __name__ == "__main__":
346
    unittest.main()