bootstrap_hash.c 17.3 KB
Newer Older
1 2
#include "Python.h"
#ifdef MS_WINDOWS
3
#  include <windows.h>
4 5 6
/* All sample MSDN wincrypt programs include the header below. It is at least
 * required with Min GW. */
#  include <wincrypt.h>
7
#else
8 9 10 11
#  include <fcntl.h>
#  ifdef HAVE_SYS_STAT_H
#    include <sys/stat.h>
#  endif
12 13 14
#  ifdef HAVE_LINUX_RANDOM_H
#    include <linux/random.h>
#  endif
15
#  if defined(HAVE_SYS_RANDOM_H) && (defined(HAVE_GETRANDOM) || defined(HAVE_GETENTROPY))
16
#    include <sys/random.h>
17 18
#  endif
#  if !defined(HAVE_GETRANDOM) && defined(HAVE_GETRANDOM_SYSCALL)
19 20
#    include <sys/syscall.h>
#  endif
21 22
#endif

23
#ifdef _Py_MEMORY_SANITIZER
24 25 26
#  include <sanitizer/msan_interface.h>
#endif

27 28 29 30 31
#ifdef Py_DEBUG
int _Py_HashSecret_Initialized = 0;
#else
static int _Py_HashSecret_Initialized = 0;
#endif
32 33 34 35 36 37 38 39

#ifdef MS_WINDOWS
static HCRYPTPROV hCryptProv = 0;

static int
win32_urandom_init(int raise)
{
    /* Acquire context */
40 41
    if (!CryptAcquireContext(&hCryptProv, NULL, NULL,
                             PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
42 43 44 45 46
        goto error;

    return 0;

error:
47
    if (raise) {
48
        PyErr_SetFromWindowsErr(0);
49
    }
50 51 52 53
    return -1;
}

/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
54
   API. Return 0 on success, or raise an exception and return -1 on error. */
55 56 57 58 59
static int
win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
{
    if (hCryptProv == 0)
    {
60
        if (win32_urandom_init(raise) == -1) {
61
            return -1;
62
        }
63 64 65 66
    }

    while (size > 0)
    {
67 68
        DWORD chunk = (DWORD)Py_MIN(size, PY_DWORD_MAX);
        if (!CryptGenRandom(hCryptProv, chunk, buffer))
69 70
        {
            /* CryptGenRandom() failed */
71
            if (raise) {
72
                PyErr_SetFromWindowsErr(0);
73
            }
74 75 76 77 78 79 80 81
            return -1;
        }
        buffer += chunk;
        size -= chunk;
    }
    return 0;
}

82 83
#else /* !MS_WINDOWS */

84
#if defined(HAVE_GETRANDOM) || defined(HAVE_GETRANDOM_SYSCALL)
85
#define PY_GETRANDOM 1
86

87 88
/* Call getrandom() to get random bytes:

89
   - Return 1 on success
90 91 92
   - Return 0 if getrandom() is not available (failed with ENOSYS or EPERM),
     or if getrandom(GRND_NONBLOCK) failed with EAGAIN (system urandom not
     initialized yet) and raise=0.
93
   - Raise an exception (if raise is non-zero) and return -1 on error:
94 95 96 97 98
     if getrandom() failed with EINTR, raise is non-zero and the Python signal
     handler raised an exception, or if getrandom() failed with a different
     error.

   getrandom() is retried if it failed with EINTR: interrupted by a signal. */
99
static int
100
py_getrandom(void *buffer, Py_ssize_t size, int blocking, int raise)
101
{
102
    /* Is getrandom() supported by the running kernel? Set to 0 if getrandom()
103
       failed with ENOSYS or EPERM. Need Linux kernel 3.17 or newer, or Solaris
Victor Stinner's avatar
Victor Stinner committed
104
       11.3 or newer */
105
    static int getrandom_works = 1;
106
    int flags;
107
    char *dest;
108
    long n;
109

110
    if (!getrandom_works) {
111
        return 0;
112
    }
113

114
    flags = blocking ? 0 : GRND_NONBLOCK;
115
    dest = buffer;
116
    while (0 < size) {
117 118
#ifdef sun
        /* Issue #26735: On Solaris, getrandom() is limited to returning up
119 120
           to 1024 bytes. Call it multiple times if more bytes are
           requested. */
121 122
        n = Py_MIN(size, 1024);
#else
123
        n = Py_MIN(size, LONG_MAX);
124
#endif
125

126
        errno = 0;
127 128 129
#ifdef HAVE_GETRANDOM
        if (raise) {
            Py_BEGIN_ALLOW_THREADS
130
            n = getrandom(dest, n, flags);
131 132 133
            Py_END_ALLOW_THREADS
        }
        else {
134
            n = getrandom(dest, n, flags);
135 136 137
        }
#else
        /* On Linux, use the syscall() function because the GNU libc doesn't
138 139
           expose the Linux getrandom() syscall yet. See:
           https://sourceware.org/bugzilla/show_bug.cgi?id=17252 */
140 141
        if (raise) {
            Py_BEGIN_ALLOW_THREADS
142
            n = syscall(SYS_getrandom, dest, n, flags);
143 144 145
            Py_END_ALLOW_THREADS
        }
        else {
146
            n = syscall(SYS_getrandom, dest, n, flags);
147
        }
148
#  ifdef _Py_MEMORY_SANITIZER
149 150 151 152
        if (n > 0) {
             __msan_unpoison(dest, n);
        }
#  endif
153
#endif
154

155
        if (n < 0) {
156 157 158
            /* ENOSYS: the syscall is not supported by the kernel.
               EPERM: the syscall is blocked by a security policy (ex: SECCOMP)
               or something else. */
159
            if (errno == ENOSYS || errno == EPERM) {
160 161 162
                getrandom_works = 0;
                return 0;
            }
163 164

            /* getrandom(GRND_NONBLOCK) fails with EAGAIN if the system urandom
165
               is not initialiazed yet. For _PyRandom_Init(), we ignore the
166
               error and fall back on reading /dev/urandom which never blocks,
167 168
               even if the system urandom is not initialized yet:
               see the PEP 524. */
169
            if (errno == EAGAIN && !raise && !blocking) {
170 171
                return 0;
            }
172 173

            if (errno == EINTR) {
174 175 176 177
                if (raise) {
                    if (PyErr_CheckSignals()) {
                        return -1;
                    }
178
                }
179 180

                /* retry getrandom() if it was interrupted by a signal */
181 182 183
                continue;
            }

184
            if (raise) {
185
                PyErr_SetFromErrno(PyExc_OSError);
186
            }
187 188 189
            return -1;
        }

190
        dest += n;
191 192 193 194
        size -= n;
    }
    return 1;
}
195 196 197 198

#elif defined(HAVE_GETENTROPY)
#define PY_GETENTROPY 1

199
/* Fill buffer with size pseudo-random bytes generated by getentropy():
200

201 202 203 204 205 206 207 208 209
   - Return 1 on success
   - Return 0 if getentropy() syscall is not available (failed with ENOSYS or
     EPERM).
   - Raise an exception (if raise is non-zero) and return -1 on error:
     if getentropy() failed with EINTR, raise is non-zero and the Python signal
     handler raised an exception, or if getentropy() failed with a different
     error.

   getentropy() is retried if it failed with EINTR: interrupted by a signal. */
210 211 212
static int
py_getentropy(char *buffer, Py_ssize_t size, int raise)
{
213 214 215 216 217 218 219 220
    /* Is getentropy() supported by the running kernel? Set to 0 if
       getentropy() failed with ENOSYS or EPERM. */
    static int getentropy_works = 1;

    if (!getentropy_works) {
        return 0;
    }

221
    while (size > 0) {
222 223
        /* getentropy() is limited to returning up to 256 bytes. Call it
           multiple times if more bytes are requested. */
224 225 226 227 228 229 230 231 232 233 234 235 236
        Py_ssize_t len = Py_MIN(size, 256);
        int res;

        if (raise) {
            Py_BEGIN_ALLOW_THREADS
            res = getentropy(buffer, len);
            Py_END_ALLOW_THREADS
        }
        else {
            res = getentropy(buffer, len);
        }

        if (res < 0) {
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
            /* ENOSYS: the syscall is not supported by the running kernel.
               EPERM: the syscall is blocked by a security policy (ex: SECCOMP)
               or something else. */
            if (errno == ENOSYS || errno == EPERM) {
                getentropy_works = 0;
                return 0;
            }

            if (errno == EINTR) {
                if (raise) {
                    if (PyErr_CheckSignals()) {
                        return -1;
                    }
                }

                /* retry getentropy() if it was interrupted by a signal */
                continue;
            }

256 257 258 259 260 261 262 263 264 265 266 267
            if (raise) {
                PyErr_SetFromErrno(PyExc_OSError);
            }
            return -1;
        }

        buffer += len;
        size -= len;
    }
    return 1;
}
#endif /* defined(HAVE_GETENTROPY) && !defined(sun) */
268

269

270 271 272 273 274
static struct {
    int fd;
    dev_t st_dev;
    ino_t st_ino;
} urandom_cache = { -1 };
275

276
/* Read random bytes from the /dev/urandom device:
277

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
   - Return 0 on success
   - Raise an exception (if raise is non-zero) and return -1 on error

   Possible causes of errors:

   - open() failed with ENOENT, ENXIO, ENODEV, EACCES: the /dev/urandom device
     was not found. For example, it was removed manually or not exposed in a
     chroot or container.
   - open() failed with a different error
   - fstat() failed
   - read() failed or returned 0

   read() is retried if it failed with EINTR: interrupted by a signal.

   The file descriptor of the device is kept open between calls to avoid using
   many file descriptors when run in parallel from multiple threads:
   see the issue #18756.

   st_dev and st_ino fields of the file descriptor (from fstat()) are cached to
   check if the file descriptor was replaced by a different file (which is
   likely a bug in the application): see the issue #21207.

   If the file descriptor was closed or replaced, open a new file descriptor
   but don't close the old file descriptor: it probably points to something
   important for some third-party code. */
303
static int
304
dev_urandom(char *buffer, Py_ssize_t size, int raise)
305 306 307
{
    int fd;
    Py_ssize_t n;
308 309 310

    if (raise) {
        struct _Py_stat_struct st;
311
        int fstat_result;
312

313
        if (urandom_cache.fd >= 0) {
314 315 316 317
            Py_BEGIN_ALLOW_THREADS
            fstat_result = _Py_fstat_noraise(urandom_cache.fd, &st);
            Py_END_ALLOW_THREADS

318
            /* Does the fd point to the same thing as before? (issue #21207) */
319
            if (fstat_result
320 321 322 323 324 325 326
                || st.st_dev != urandom_cache.st_dev
                || st.st_ino != urandom_cache.st_ino) {
                /* Something changed: forget the cached fd (but don't close it,
                   since it probably points to something important for some
                   third-party code). */
                urandom_cache.fd = -1;
            }
327
        }
328 329
        if (urandom_cache.fd >= 0)
            fd = urandom_cache.fd;
330
        else {
331 332 333
            fd = _Py_open("/dev/urandom", O_RDONLY);
            if (fd < 0) {
                if (errno == ENOENT || errno == ENXIO ||
334
                    errno == ENODEV || errno == EACCES) {
335 336
                    PyErr_SetString(PyExc_NotImplementedError,
                                    "/dev/urandom (or equivalent) not found");
337
                }
338
                /* otherwise, keep the OSError exception raised by _Py_open() */
339 340
                return -1;
            }
341 342 343 344 345 346
            if (urandom_cache.fd >= 0) {
                /* urandom_fd was initialized by another thread while we were
                   not holding the GIL, keep it. */
                close(fd);
                fd = urandom_cache.fd;
            }
347
            else {
348 349 350 351 352 353 354 355 356
                if (_Py_fstat(fd, &st)) {
                    close(fd);
                    return -1;
                }
                else {
                    urandom_cache.fd = fd;
                    urandom_cache.st_dev = st.st_dev;
                    urandom_cache.st_ino = st.st_ino;
                }
357
            }
358
        }
359

360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
        do {
            n = _Py_read(fd, buffer, (size_t)size);
            if (n == -1)
                return -1;
            if (n == 0) {
                PyErr_Format(PyExc_RuntimeError,
                        "Failed to read %zi bytes from /dev/urandom",
                        size);
                return -1;
            }

            buffer += n;
            size -= n;
        } while (0 < size);
    }
    else {
        fd = _Py_open_noraise("/dev/urandom", O_RDONLY);
        if (fd < 0) {
378 379 380
            return -1;
        }

381 382 383 384 385
        while (0 < size)
        {
            do {
                n = read(fd, buffer, (size_t)size);
            } while (n < 0 && errno == EINTR);
386

387 388
            if (n <= 0) {
                /* stop on error or if read(size) returned 0 */
389
                close(fd);
390 391 392 393 394 395 396 397
                return -1;
            }

            buffer += n;
            size -= n;
        }
        close(fd);
    }
398 399
    return 0;
}
400 401 402 403

static void
dev_urandom_close(void)
{
404 405 406
    if (urandom_cache.fd >= 0) {
        close(urandom_cache.fd);
        urandom_cache.fd = -1;
407 408
    }
}
409
#endif /* !MS_WINDOWS */
410

411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432

/* Fill buffer with pseudo-random bytes generated by a linear congruent
   generator (LCG):

       x(n+1) = (x(n) * 214013 + 2531011) % 2^32

   Use bits 23..16 of x(n) to generate a byte. */
static void
lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
{
    size_t index;
    unsigned int x;

    x = x0;
    for (index=0; index < size; index++) {
        x *= 214013;
        x += 2531011;
        /* modulo 2 ^ (8 * sizeof(int)) */
        buffer[index] = (x >> 16) & 0xff;
    }
}

433 434 435 436 437 438 439 440 441
/* Read random bytes:

   - Return 0 on success
   - Raise an exception (if raise is non-zero) and return -1 on error

   Used sources of entropy ordered by preference, preferred source first:

   - CryptGenRandom() on Windows
   - getrandom() function (ex: Linux and Solaris): call py_getrandom()
442
   - getentropy() function (ex: OpenBSD): call py_getentropy()
443 444 445 446 447
   - /dev/urandom device

   Read from the /dev/urandom device if getrandom() or getentropy() function
   is not available or does not work.

448 449 450 451 452
   Prefer getrandom() over getentropy() because getrandom() supports blocking
   and non-blocking mode: see the PEP 524. Python requires non-blocking RNG at
   startup to initialize its hash secret, but os.urandom() must block until the
   system urandom is initialized (at least on Linux 3.17 and newer).

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
   Prefer getrandom() and getentropy() over reading directly /dev/urandom
   because these functions don't need file descriptors and so avoid ENFILE or
   EMFILE errors (too many open files): see the issue #18756.

   Only the getrandom() function supports non-blocking mode.

   Only use RNG running in the kernel. They are more secure because it is
   harder to get the internal state of a RNG running in the kernel land than a
   RNG running in the user land. The kernel has a direct access to the hardware
   and has access to hardware RNG, they are used as entropy sources.

   Note: the OpenSSL RAND_pseudo_bytes() function does not automatically reseed
   its RNG on fork(), two child processes (with the same pid) generate the same
   random numbers: see issue #18747. Kernel RNGs don't have this issue,
   they have access to good quality entropy sources.

   If raise is zero:

   - Don't raise an exception on error
   - Don't call the Python signal handler (don't call PyErr_CheckSignals()) if
     a function fails with EINTR: retry directly the interrupted function
   - Don't release the GIL to call functions.
*/
476
static int
477
pyurandom(void *buffer, Py_ssize_t size, int blocking, int raise)
478
{
479 480 481 482
#if defined(PY_GETRANDOM) || defined(PY_GETENTROPY)
    int res;
#endif

483
    if (size < 0) {
484 485 486 487
        if (raise) {
            PyErr_Format(PyExc_ValueError,
                         "negative argument not allowed");
        }
488 489
        return -1;
    }
490 491

    if (size == 0) {
492
        return 0;
493
    }
494 495

#ifdef MS_WINDOWS
496
    return win32_urandom((unsigned char *)buffer, size, raise);
497
#else
498 499

#if defined(PY_GETRANDOM) || defined(PY_GETENTROPY)
500
#ifdef PY_GETRANDOM
501
    res = py_getrandom(buffer, size, blocking, raise);
502 503
#else
    res = py_getentropy(buffer, size, raise);
504 505 506 507 508 509 510 511 512 513 514 515
#endif
    if (res < 0) {
        return -1;
    }
    if (res == 1) {
        return 0;
    }
    /* getrandom() or getentropy() function is not available: failed with
       ENOSYS or EPERM. Fall back on reading from /dev/urandom. */
#endif

    return dev_urandom(buffer, size, raise);
516 517 518
#endif
}

519 520 521 522
/* Fill buffer with size pseudo-random bytes from the operating system random
   number generator (RNG). It is suitable for most cryptographic purposes
   except long living private keys for asymmetric encryption.

523 524 525 526 527
   On Linux 3.17 and newer, the getrandom() syscall is used in blocking mode:
   block until the system urandom entropy pool is initialized (128 bits are
   collected by the kernel).

   Return 0 on success. Raise an exception and return -1 on error. */
528 529 530
int
_PyOS_URandom(void *buffer, Py_ssize_t size)
{
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
    return pyurandom(buffer, size, 1, 1);
}

/* Fill buffer with size pseudo-random bytes from the operating system random
   number generator (RNG). It is not suitable for cryptographic purpose.

   On Linux 3.17 and newer (when getrandom() syscall is used), if the system
   urandom is not initialized yet, the function returns "weak" entropy read
   from /dev/urandom.

   Return 0 on success. Raise an exception and return -1 on error. */
int
_PyOS_URandomNonblock(void *buffer, Py_ssize_t size)
{
    return pyurandom(buffer, size, 0, 1);
546 547
}

548 549 550

_PyInitError
_Py_HashRandomization_Init(const _PyCoreConfig *config)
551 552 553 554
{
    void *secret = &_Py_HashSecret;
    Py_ssize_t secret_size = sizeof(_Py_HashSecret_t);

555 556 557
    if (_Py_HashSecret_Initialized) {
        return _Py_INIT_OK();
    }
558 559
    _Py_HashSecret_Initialized = 1;

560 561
    if (config->use_hash_seed) {
        if (config->hash_seed == 0) {
562 563 564 565
            /* disable the randomized hash */
            memset(secret, 0, secret_size);
        }
        else {
566
            /* use the specified hash seed */
567
            lcg_urandom(config->hash_seed, secret, secret_size);
568 569 570
        }
    }
    else {
571
        /* use a random hash seed */
572 573 574
        int res;

        /* _PyRandom_Init() is called very early in the Python initialization
575 576 577 578 579
           and so exceptions cannot be used (use raise=0).

           _PyRandom_Init() must not block Python initialization: call
           pyurandom() is non-blocking mode (blocking=0): see the PEP 524. */
        res = pyurandom(secret, secret_size, 0, 0);
580
        if (res < 0) {
581 582
            return _Py_INIT_USER_ERR("failed to get random numbers "
                                     "to initialize Python");
583
        }
584
    }
585
    return _Py_INIT_OK();
586
}
587

588 589 590

void
_Py_HashRandomization_Fini(void)
591
{
592 593
#ifdef MS_WINDOWS
    if (hCryptProv) {
594
        CryptReleaseContext(hCryptProv, 0);
595 596 597
        hCryptProv = 0;
    }
#else
598 599 600
    dev_urandom_close();
#endif
}