bootstrap_hash.c 17.9 KB
Newer Older
1 2
#include "Python.h"
#ifdef MS_WINDOWS
3
#  include <windows.h>
4 5 6
/* All sample MSDN wincrypt programs include the header below. It is at least
 * required with Min GW. */
#  include <wincrypt.h>
7
#else
8 9 10 11
#  include <fcntl.h>
#  ifdef HAVE_SYS_STAT_H
#    include <sys/stat.h>
#  endif
12 13 14
#  ifdef HAVE_LINUX_RANDOM_H
#    include <linux/random.h>
#  endif
15
#  if defined(HAVE_SYS_RANDOM_H) && (defined(HAVE_GETRANDOM) || defined(HAVE_GETENTROPY))
16
#    include <sys/random.h>
17 18
#  endif
#  if !defined(HAVE_GETRANDOM) && defined(HAVE_GETRANDOM_SYSCALL)
19 20
#    include <sys/syscall.h>
#  endif
21 22
#endif

23 24 25 26 27
#ifdef Py_DEBUG
int _Py_HashSecret_Initialized = 0;
#else
static int _Py_HashSecret_Initialized = 0;
#endif
28 29 30 31 32 33 34 35

#ifdef MS_WINDOWS
static HCRYPTPROV hCryptProv = 0;

static int
win32_urandom_init(int raise)
{
    /* Acquire context */
36 37
    if (!CryptAcquireContext(&hCryptProv, NULL, NULL,
                             PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
38 39 40 41 42
        goto error;

    return 0;

error:
43
    if (raise) {
44
        PyErr_SetFromWindowsErr(0);
45
    }
46 47 48 49
    return -1;
}

/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
50
   API. Return 0 on success, or raise an exception and return -1 on error. */
51 52 53 54 55 56 57
static int
win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
{
    Py_ssize_t chunk;

    if (hCryptProv == 0)
    {
58
        if (win32_urandom_init(raise) == -1) {
59
            return -1;
60
        }
61 62 63 64 65
    }

    while (size > 0)
    {
        chunk = size > INT_MAX ? INT_MAX : size;
66
        if (!CryptGenRandom(hCryptProv, (DWORD)chunk, buffer))
67 68
        {
            /* CryptGenRandom() failed */
69
            if (raise) {
70
                PyErr_SetFromWindowsErr(0);
71
            }
72 73 74 75 76 77 78 79
            return -1;
        }
        buffer += chunk;
        size -= chunk;
    }
    return 0;
}

80 81
#else /* !MS_WINDOWS */

82
#if defined(HAVE_GETRANDOM) || defined(HAVE_GETRANDOM_SYSCALL)
83
#define PY_GETRANDOM 1
84

85 86
/* Call getrandom() to get random bytes:

87
   - Return 1 on success
88 89 90
   - Return 0 if getrandom() is not available (failed with ENOSYS or EPERM),
     or if getrandom(GRND_NONBLOCK) failed with EAGAIN (system urandom not
     initialized yet) and raise=0.
91
   - Raise an exception (if raise is non-zero) and return -1 on error:
92 93 94 95 96
     if getrandom() failed with EINTR, raise is non-zero and the Python signal
     handler raised an exception, or if getrandom() failed with a different
     error.

   getrandom() is retried if it failed with EINTR: interrupted by a signal. */
97
static int
98
py_getrandom(void *buffer, Py_ssize_t size, int blocking, int raise)
99
{
100
    /* Is getrandom() supported by the running kernel? Set to 0 if getrandom()
101
       failed with ENOSYS or EPERM. Need Linux kernel 3.17 or newer, or Solaris
Victor Stinner's avatar
Victor Stinner committed
102
       11.3 or newer */
103
    static int getrandom_works = 1;
104
    int flags;
105
    char *dest;
106
    long n;
107

108
    if (!getrandom_works) {
109
        return 0;
110
    }
111

112
    flags = blocking ? 0 : GRND_NONBLOCK;
113
    dest = buffer;
114
    while (0 < size) {
115 116
#ifdef sun
        /* Issue #26735: On Solaris, getrandom() is limited to returning up
117 118
           to 1024 bytes. Call it multiple times if more bytes are
           requested. */
119 120
        n = Py_MIN(size, 1024);
#else
121
        n = Py_MIN(size, LONG_MAX);
122
#endif
123

124
        errno = 0;
125 126 127
#ifdef HAVE_GETRANDOM
        if (raise) {
            Py_BEGIN_ALLOW_THREADS
128
            n = getrandom(dest, n, flags);
129 130 131
            Py_END_ALLOW_THREADS
        }
        else {
132
            n = getrandom(dest, n, flags);
133 134 135
        }
#else
        /* On Linux, use the syscall() function because the GNU libc doesn't
136 137
           expose the Linux getrandom() syscall yet. See:
           https://sourceware.org/bugzilla/show_bug.cgi?id=17252 */
138 139
        if (raise) {
            Py_BEGIN_ALLOW_THREADS
140
            n = syscall(SYS_getrandom, dest, n, flags);
141 142 143
            Py_END_ALLOW_THREADS
        }
        else {
144
            n = syscall(SYS_getrandom, dest, n, flags);
145
        }
146
#endif
147

148
        if (n < 0) {
149 150 151
            /* ENOSYS: the syscall is not supported by the kernel.
               EPERM: the syscall is blocked by a security policy (ex: SECCOMP)
               or something else. */
152
            if (errno == ENOSYS || errno == EPERM) {
153 154 155
                getrandom_works = 0;
                return 0;
            }
156 157

            /* getrandom(GRND_NONBLOCK) fails with EAGAIN if the system urandom
158
               is not initialiazed yet. For _PyRandom_Init(), we ignore the
159
               error and fall back on reading /dev/urandom which never blocks,
160 161
               even if the system urandom is not initialized yet:
               see the PEP 524. */
162
            if (errno == EAGAIN && !raise && !blocking) {
163 164
                return 0;
            }
165 166

            if (errno == EINTR) {
167 168 169 170
                if (raise) {
                    if (PyErr_CheckSignals()) {
                        return -1;
                    }
171
                }
172 173

                /* retry getrandom() if it was interrupted by a signal */
174 175 176
                continue;
            }

177
            if (raise) {
178
                PyErr_SetFromErrno(PyExc_OSError);
179
            }
180 181 182
            return -1;
        }

183
        dest += n;
184 185 186 187
        size -= n;
    }
    return 1;
}
188 189 190 191

#elif defined(HAVE_GETENTROPY)
#define PY_GETENTROPY 1

192
/* Fill buffer with size pseudo-random bytes generated by getentropy():
193

194 195 196 197 198 199 200 201 202
   - Return 1 on success
   - Return 0 if getentropy() syscall is not available (failed with ENOSYS or
     EPERM).
   - Raise an exception (if raise is non-zero) and return -1 on error:
     if getentropy() failed with EINTR, raise is non-zero and the Python signal
     handler raised an exception, or if getentropy() failed with a different
     error.

   getentropy() is retried if it failed with EINTR: interrupted by a signal. */
203 204 205
static int
py_getentropy(char *buffer, Py_ssize_t size, int raise)
{
206 207 208 209 210 211 212 213
    /* Is getentropy() supported by the running kernel? Set to 0 if
       getentropy() failed with ENOSYS or EPERM. */
    static int getentropy_works = 1;

    if (!getentropy_works) {
        return 0;
    }

214
    while (size > 0) {
215 216
        /* getentropy() is limited to returning up to 256 bytes. Call it
           multiple times if more bytes are requested. */
217 218 219 220 221 222 223 224 225 226 227 228 229
        Py_ssize_t len = Py_MIN(size, 256);
        int res;

        if (raise) {
            Py_BEGIN_ALLOW_THREADS
            res = getentropy(buffer, len);
            Py_END_ALLOW_THREADS
        }
        else {
            res = getentropy(buffer, len);
        }

        if (res < 0) {
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
            /* ENOSYS: the syscall is not supported by the running kernel.
               EPERM: the syscall is blocked by a security policy (ex: SECCOMP)
               or something else. */
            if (errno == ENOSYS || errno == EPERM) {
                getentropy_works = 0;
                return 0;
            }

            if (errno == EINTR) {
                if (raise) {
                    if (PyErr_CheckSignals()) {
                        return -1;
                    }
                }

                /* retry getentropy() if it was interrupted by a signal */
                continue;
            }

249 250 251 252 253 254 255 256 257 258 259 260
            if (raise) {
                PyErr_SetFromErrno(PyExc_OSError);
            }
            return -1;
        }

        buffer += len;
        size -= len;
    }
    return 1;
}
#endif /* defined(HAVE_GETENTROPY) && !defined(sun) */
261

262

263 264 265 266 267
static struct {
    int fd;
    dev_t st_dev;
    ino_t st_ino;
} urandom_cache = { -1 };
268

269
/* Read random bytes from the /dev/urandom device:
270

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
   - Return 0 on success
   - Raise an exception (if raise is non-zero) and return -1 on error

   Possible causes of errors:

   - open() failed with ENOENT, ENXIO, ENODEV, EACCES: the /dev/urandom device
     was not found. For example, it was removed manually or not exposed in a
     chroot or container.
   - open() failed with a different error
   - fstat() failed
   - read() failed or returned 0

   read() is retried if it failed with EINTR: interrupted by a signal.

   The file descriptor of the device is kept open between calls to avoid using
   many file descriptors when run in parallel from multiple threads:
   see the issue #18756.

   st_dev and st_ino fields of the file descriptor (from fstat()) are cached to
   check if the file descriptor was replaced by a different file (which is
   likely a bug in the application): see the issue #21207.

   If the file descriptor was closed or replaced, open a new file descriptor
   but don't close the old file descriptor: it probably points to something
   important for some third-party code. */
296
static int
297
dev_urandom(char *buffer, Py_ssize_t size, int raise)
298 299 300
{
    int fd;
    Py_ssize_t n;
301 302 303

    if (raise) {
        struct _Py_stat_struct st;
304
        int fstat_result;
305

306
        if (urandom_cache.fd >= 0) {
307 308 309 310
            Py_BEGIN_ALLOW_THREADS
            fstat_result = _Py_fstat_noraise(urandom_cache.fd, &st);
            Py_END_ALLOW_THREADS

311
            /* Does the fd point to the same thing as before? (issue #21207) */
312
            if (fstat_result
313 314 315 316 317 318 319
                || st.st_dev != urandom_cache.st_dev
                || st.st_ino != urandom_cache.st_ino) {
                /* Something changed: forget the cached fd (but don't close it,
                   since it probably points to something important for some
                   third-party code). */
                urandom_cache.fd = -1;
            }
320
        }
321 322
        if (urandom_cache.fd >= 0)
            fd = urandom_cache.fd;
323
        else {
324 325 326
            fd = _Py_open("/dev/urandom", O_RDONLY);
            if (fd < 0) {
                if (errno == ENOENT || errno == ENXIO ||
327
                    errno == ENODEV || errno == EACCES) {
328 329
                    PyErr_SetString(PyExc_NotImplementedError,
                                    "/dev/urandom (or equivalent) not found");
330
                }
331
                /* otherwise, keep the OSError exception raised by _Py_open() */
332 333
                return -1;
            }
334 335 336 337 338 339
            if (urandom_cache.fd >= 0) {
                /* urandom_fd was initialized by another thread while we were
                   not holding the GIL, keep it. */
                close(fd);
                fd = urandom_cache.fd;
            }
340
            else {
341 342 343 344 345 346 347 348 349
                if (_Py_fstat(fd, &st)) {
                    close(fd);
                    return -1;
                }
                else {
                    urandom_cache.fd = fd;
                    urandom_cache.st_dev = st.st_dev;
                    urandom_cache.st_ino = st.st_ino;
                }
350
            }
351
        }
352

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
        do {
            n = _Py_read(fd, buffer, (size_t)size);
            if (n == -1)
                return -1;
            if (n == 0) {
                PyErr_Format(PyExc_RuntimeError,
                        "Failed to read %zi bytes from /dev/urandom",
                        size);
                return -1;
            }

            buffer += n;
            size -= n;
        } while (0 < size);
    }
    else {
        fd = _Py_open_noraise("/dev/urandom", O_RDONLY);
        if (fd < 0) {
371 372 373
            return -1;
        }

374 375 376 377 378
        while (0 < size)
        {
            do {
                n = read(fd, buffer, (size_t)size);
            } while (n < 0 && errno == EINTR);
379

380 381
            if (n <= 0) {
                /* stop on error or if read(size) returned 0 */
382
                close(fd);
383 384 385 386 387 388 389 390
                return -1;
            }

            buffer += n;
            size -= n;
        }
        close(fd);
    }
391 392
    return 0;
}
393 394 395 396

static void
dev_urandom_close(void)
{
397 398 399
    if (urandom_cache.fd >= 0) {
        close(urandom_cache.fd);
        urandom_cache.fd = -1;
400 401
    }
}
402
#endif /* !MS_WINDOWS */
403

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425

/* Fill buffer with pseudo-random bytes generated by a linear congruent
   generator (LCG):

       x(n+1) = (x(n) * 214013 + 2531011) % 2^32

   Use bits 23..16 of x(n) to generate a byte. */
static void
lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
{
    size_t index;
    unsigned int x;

    x = x0;
    for (index=0; index < size; index++) {
        x *= 214013;
        x += 2531011;
        /* modulo 2 ^ (8 * sizeof(int)) */
        buffer[index] = (x >> 16) & 0xff;
    }
}

426 427 428 429 430 431 432 433 434
/* Read random bytes:

   - Return 0 on success
   - Raise an exception (if raise is non-zero) and return -1 on error

   Used sources of entropy ordered by preference, preferred source first:

   - CryptGenRandom() on Windows
   - getrandom() function (ex: Linux and Solaris): call py_getrandom()
435
   - getentropy() function (ex: OpenBSD): call py_getentropy()
436 437 438 439 440
   - /dev/urandom device

   Read from the /dev/urandom device if getrandom() or getentropy() function
   is not available or does not work.

441 442 443 444 445
   Prefer getrandom() over getentropy() because getrandom() supports blocking
   and non-blocking mode: see the PEP 524. Python requires non-blocking RNG at
   startup to initialize its hash secret, but os.urandom() must block until the
   system urandom is initialized (at least on Linux 3.17 and newer).

446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
   Prefer getrandom() and getentropy() over reading directly /dev/urandom
   because these functions don't need file descriptors and so avoid ENFILE or
   EMFILE errors (too many open files): see the issue #18756.

   Only the getrandom() function supports non-blocking mode.

   Only use RNG running in the kernel. They are more secure because it is
   harder to get the internal state of a RNG running in the kernel land than a
   RNG running in the user land. The kernel has a direct access to the hardware
   and has access to hardware RNG, they are used as entropy sources.

   Note: the OpenSSL RAND_pseudo_bytes() function does not automatically reseed
   its RNG on fork(), two child processes (with the same pid) generate the same
   random numbers: see issue #18747. Kernel RNGs don't have this issue,
   they have access to good quality entropy sources.

   If raise is zero:

   - Don't raise an exception on error
   - Don't call the Python signal handler (don't call PyErr_CheckSignals()) if
     a function fails with EINTR: retry directly the interrupted function
   - Don't release the GIL to call functions.
*/
469
static int
470
pyurandom(void *buffer, Py_ssize_t size, int blocking, int raise)
471
{
472 473 474 475
#if defined(PY_GETRANDOM) || defined(PY_GETENTROPY)
    int res;
#endif

476
    if (size < 0) {
477 478 479 480
        if (raise) {
            PyErr_Format(PyExc_ValueError,
                         "negative argument not allowed");
        }
481 482
        return -1;
    }
483 484

    if (size == 0) {
485
        return 0;
486
    }
487 488

#ifdef MS_WINDOWS
489
    return win32_urandom((unsigned char *)buffer, size, raise);
490
#else
491 492

#if defined(PY_GETRANDOM) || defined(PY_GETENTROPY)
493
#ifdef PY_GETRANDOM
494
    res = py_getrandom(buffer, size, blocking, raise);
495 496
#else
    res = py_getentropy(buffer, size, raise);
497 498 499 500 501 502 503 504 505 506 507 508
#endif
    if (res < 0) {
        return -1;
    }
    if (res == 1) {
        return 0;
    }
    /* getrandom() or getentropy() function is not available: failed with
       ENOSYS or EPERM. Fall back on reading from /dev/urandom. */
#endif

    return dev_urandom(buffer, size, raise);
509 510 511
#endif
}

512 513 514 515
/* Fill buffer with size pseudo-random bytes from the operating system random
   number generator (RNG). It is suitable for most cryptographic purposes
   except long living private keys for asymmetric encryption.

516 517 518 519 520
   On Linux 3.17 and newer, the getrandom() syscall is used in blocking mode:
   block until the system urandom entropy pool is initialized (128 bits are
   collected by the kernel).

   Return 0 on success. Raise an exception and return -1 on error. */
521 522 523
int
_PyOS_URandom(void *buffer, Py_ssize_t size)
{
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
    return pyurandom(buffer, size, 1, 1);
}

/* Fill buffer with size pseudo-random bytes from the operating system random
   number generator (RNG). It is not suitable for cryptographic purpose.

   On Linux 3.17 and newer (when getrandom() syscall is used), if the system
   urandom is not initialized yet, the function returns "weak" entropy read
   from /dev/urandom.

   Return 0 on success. Raise an exception and return -1 on error. */
int
_PyOS_URandomNonblock(void *buffer, Py_ssize_t size)
{
    return pyurandom(buffer, size, 0, 1);
539 540
}

541 542 543 544
int
_Py_ReadHashSeed(const char *seed_text,
                 int *use_hash_seed,
                 unsigned long *hash_seed)
545
{
546
    Py_BUILD_ASSERT(sizeof(_Py_HashSecret_t) == sizeof(_Py_HashSecret.uc));
547 548
    /* Convert a text seed to a numeric one */
    if (seed_text && *seed_text != '\0' && strcmp(seed_text, "random") != 0) {
549
        const char *endptr = seed_text;
550
        unsigned long seed;
551
        seed = strtoul(seed_text, (char **)&endptr, 10);
552 553 554 555
        if (*endptr != '\0'
            || seed > 4294967295UL
            || (errno == ERANGE && seed == ULONG_MAX))
        {
556
            return -1;
557
        }
558 559 560 561 562 563 564 565 566 567 568 569
        /* Use a specific hash */
        *use_hash_seed = 1;
        *hash_seed = seed;
    }
    else {
        /* Use a random hash */
        *use_hash_seed = 0;
        *hash_seed = 0;
    }
    return 0;
}

570 571 572

_PyInitError
_Py_HashRandomization_Init(const _PyCoreConfig *config)
573 574 575 576
{
    void *secret = &_Py_HashSecret;
    Py_ssize_t secret_size = sizeof(_Py_HashSecret_t);

577 578 579
    if (_Py_HashSecret_Initialized) {
        return _Py_INIT_OK();
    }
580 581
    _Py_HashSecret_Initialized = 1;

582 583
    if (config->use_hash_seed) {
        if (config->hash_seed == 0) {
584 585 586 587
            /* disable the randomized hash */
            memset(secret, 0, secret_size);
        }
        else {
588
            /* use the specified hash seed */
589
            lcg_urandom(config->hash_seed, secret, secret_size);
590 591 592
        }
    }
    else {
593
        /* use a random hash seed */
594 595 596
        int res;

        /* _PyRandom_Init() is called very early in the Python initialization
597 598 599 600 601
           and so exceptions cannot be used (use raise=0).

           _PyRandom_Init() must not block Python initialization: call
           pyurandom() is non-blocking mode (blocking=0): see the PEP 524. */
        res = pyurandom(secret, secret_size, 0, 0);
602
        if (res < 0) {
603 604
            return _Py_INIT_USER_ERR("failed to get random numbers "
                                     "to initialize Python");
605
        }
606
    }
607
    return _Py_INIT_OK();
608
}
609

610 611 612

void
_Py_HashRandomization_Fini(void)
613
{
614 615
#ifdef MS_WINDOWS
    if (hCryptProv) {
616
        CryptReleaseContext(hCryptProv, 0);
617 618 619
        hCryptProv = 0;
    }
#else
620 621 622
    dev_urandom_close();
#endif
}