hashlib.py 9.25 KB
Newer Older
1
#.  Copyright (C) 2005-2010   Gregory P. Smith (greg@krypto.org)
2 3 4 5 6
#  Licensed to PSF under a Contributor Agreement.
#

__doc__ = """hashlib module - A common interface to many hash functions.

7 8 9
new(name, data=b'', **kwargs) - returns a new hash object implementing the
                                given hash function; initializing the hash
                                using the given binary data.
10

11 12
Named constructor functions are also available, these are faster
than using new(name):
13

14 15
md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(),
sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256.
16

17 18 19
More algorithms may be available on your platform but the above are guaranteed
to exist.  See the algorithms_guaranteed and algorithms_available attributes
to find out what algorithm names can be passed to new().
20

Christian Heimes's avatar
Christian Heimes committed
21 22 23
NOTE: If you want the adler32 or crc32 hash functions they are available in
the zlib module.

24
Choose your hash function wisely.  Some have known collision weaknesses.
25
sha384 and sha512 will be slow on 32 bit platforms.
26 27

Hash objects have these methods:
28
 - update(arg): Update the hash object with the bytes in arg. Repeated calls
29 30
                are equivalent to a single call with the concatenation of all
                the arguments.
31 32 33 34
 - digest():    Return the digest of the bytes passed to the update() method
                so far.
 - hexdigest(): Like digest() except the digest is returned as a unicode
                object of double length, containing only hexadecimal digits.
35 36 37 38 39 40 41 42 43
 - copy():      Return a copy (clone) of the hash object. This can be used to
                efficiently compute the digests of strings that share a common
                initial substring.

For example, to obtain the digest of the string 'Nobody inspects the
spammish repetition':

    >>> import hashlib
    >>> m = hashlib.md5()
44 45
    >>> m.update(b"Nobody inspects")
    >>> m.update(b" the spammish repetition")
46
    >>> m.digest()
47
    b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9'
48 49 50

More condensed:

51
    >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest()
52 53
    'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2'

54 55
"""

56 57
# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
58
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
59 60 61 62
                      'blake2b', 'blake2s',
                      'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
                      'shake_128', 'shake_256')

63

64 65
algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
66

67
__all__ = __always_supported + ('new', 'algorithms_guaranteed',
68
                                'algorithms_available', 'pbkdf2_hmac')
69

70

71 72
__builtin_constructor_cache = {}

73
def __get_builtin_constructor(name):
74 75 76 77
    cache = __builtin_constructor_cache
    constructor = cache.get(name)
    if constructor is not None:
        return constructor
78 79 80
    try:
        if name in ('SHA1', 'sha1'):
            import _sha1
81
            cache['SHA1'] = cache['sha1'] = _sha1.sha1
82 83
        elif name in ('MD5', 'md5'):
            import _md5
84
            cache['MD5'] = cache['md5'] = _md5.md5
85 86
        elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
            import _sha256
87 88
            cache['SHA224'] = cache['sha224'] = _sha256.sha224
            cache['SHA256'] = cache['sha256'] = _sha256.sha256
89 90
        elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
            import _sha512
91 92
            cache['SHA384'] = cache['sha384'] = _sha512.sha384
            cache['SHA512'] = cache['sha512'] = _sha512.sha512
93 94 95 96
        elif name in ('blake2b', 'blake2s'):
            import _blake2
            cache['blake2b'] = _blake2.blake2b
            cache['blake2s'] = _blake2.blake2s
97 98 99 100 101 102 103 104 105
        elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
                      'shake_128', 'shake_256'}:
            import _sha3
            cache['sha3_224'] = _sha3.sha3_224
            cache['sha3_256'] = _sha3.sha3_256
            cache['sha3_384'] = _sha3.sha3_384
            cache['sha3_512'] = _sha3.sha3_512
            cache['shake_128'] = _sha3.shake_128
            cache['shake_256'] = _sha3.shake_256
106
    except ImportError:
Gregory P. Smith's avatar
Gregory P. Smith committed
107
        pass  # no extension module, this hash is unsupported.
108

109 110 111 112
    constructor = cache.get(name)
    if constructor is not None:
        return constructor

113
    raise ValueError('unsupported hash type ' + name)
114 115 116


def __get_openssl_constructor(name):
117 118 119
    if name in {'blake2b', 'blake2s'}:
        # Prefer our blake2 implementation.
        return __get_builtin_constructor(name)
120 121 122 123 124 125 126 127 128
    try:
        f = getattr(_hashlib, 'openssl_' + name)
        # Allow the C module to raise ValueError.  The function will be
        # defined but the hash not actually available thanks to OpenSSL.
        f()
        # Use the C function directly (very fast)
        return f
    except (AttributeError, ValueError):
        return __get_builtin_constructor(name)
129 130


131 132 133
def __py_new(name, data=b'', **kwargs):
    """new(name, data=b'', **kwargs) - Return a new hashing object using the
    named algorithm; optionally initialized with data (which must be bytes).
134
    """
135
    return __get_builtin_constructor(name)(data, **kwargs)
136 137


138
def __hash_new(name, data=b'', **kwargs):
139 140
    """new(name, data=b'') - Return a new hashing object using the named algorithm;
    optionally initialized with data (which must be bytes).
141
    """
142 143 144 145 146 147
    if name in {'blake2b', 'blake2s'}:
        # Prefer our blake2 implementation.
        # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.
        # It does neither support keyed blake2 nor advanced features like
        # salt, personal, tree hashing or SSE.
        return __get_builtin_constructor(name)(data, **kwargs)
148
    try:
149
        return _hashlib.new(name, data)
150 151 152 153 154
    except ValueError:
        # If the _hashlib module (OpenSSL) doesn't support the named
        # hash, try using our builtin implementations.
        # This allows for SHA224/256 and SHA384/512 support even though
        # the OpenSSL library prior to 0.9.8 doesn't provide them.
155
        return __get_builtin_constructor(name)(data)
156 157 158 159 160


try:
    import _hashlib
    new = __hash_new
161
    __get_hash = __get_openssl_constructor
162 163
    algorithms_available = algorithms_available.union(
            _hashlib.openssl_md_meth_names)
164
except ImportError:
165
    new = __py_new
166 167
    __get_hash = __get_builtin_constructor

168
try:
169
    # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
170 171
    from _hashlib import pbkdf2_hmac
except ImportError:
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
    _trans_5C = bytes((x ^ 0x5C) for x in range(256))
    _trans_36 = bytes((x ^ 0x36) for x in range(256))

    def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None):
        """Password based key derivation function 2 (PKCS #5 v2.0)

        This Python implementations based on the hmac module about as fast
        as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
        for long passwords.
        """
        if not isinstance(hash_name, str):
            raise TypeError(hash_name)

        if not isinstance(password, (bytes, bytearray)):
            password = bytes(memoryview(password))
        if not isinstance(salt, (bytes, bytearray)):
            salt = bytes(memoryview(salt))

        # Fast inline HMAC implementation
        inner = new(hash_name)
        outer = new(hash_name)
        blocksize = getattr(inner, 'block_size', 64)
        if len(password) > blocksize:
            password = new(hash_name, password).digest()
        password = password + b'\x00' * (blocksize - len(password))
        inner.update(password.translate(_trans_36))
        outer.update(password.translate(_trans_5C))

        def prf(msg, inner=inner, outer=outer):
            # PBKDF2_HMAC uses the password as key. We can re-use the same
202
            # digest objects and just update copies to skip initialization.
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
            icpy = inner.copy()
            ocpy = outer.copy()
            icpy.update(msg)
            ocpy.update(icpy.digest())
            return ocpy.digest()

        if iterations < 1:
            raise ValueError(iterations)
        if dklen is None:
            dklen = outer.digest_size
        if dklen < 1:
            raise ValueError(dklen)

        dkey = b''
        loop = 1
        from_bytes = int.from_bytes
        while len(dkey) < dklen:
            prev = prf(salt + loop.to_bytes(4, 'big'))
            # endianess doesn't matter here as long to / from use the same
            rkey = int.from_bytes(prev, 'big')
            for i in range(iterations - 1):
                prev = prf(prev)
                # rkey = rkey ^ prev
                rkey ^= from_bytes(prev, 'big')
            loop += 1
            dkey += rkey.to_bytes(inner.digest_size, 'big')

        return dkey[:dklen]

232 233 234 235 236 237
try:
    # OpenSSL's scrypt requires OpenSSL 1.1+
    from _hashlib import scrypt
except ImportError:
    pass

238

239 240 241 242 243 244 245 246
for __func_name in __always_supported:
    # try them all, some may not work due to the OpenSSL
    # version not supporting that algorithm.
    try:
        globals()[__func_name] = __get_hash(__func_name)
    except ValueError:
        import logging
        logging.exception('code for hash %s was not found.', __func_name)
247

248

249 250 251
# Cleanup locals()
del __always_supported, __func_name, __get_hash
del __py_new, __hash_new, __get_openssl_constructor