bpo-18835: Cleanup pymalloc (#4200)

Cleanup pymalloc: * Rename _PyObject_Alloc() to pymalloc_alloc() * Rename _PyObject_FreeImpl() to pymalloc_free() * Rename _PyObject_Realloc() to pymalloc_realloc() * pymalloc_alloc() and pymalloc_realloc() don't fallback on the raw allocator anymore, it now must be done by the caller * Add "success" and "failed" labels to pymalloc_alloc() and pymalloc_free() * pymalloc_alloc() and pymalloc_free() don't update num_allocated_blocks anymore: it should be done in the caller * _PyObject_Calloc() is now responsible to fill the memory block allocated by pymalloc with zeros * Simplify pymalloc_alloc() prototype * _PyObject_Realloc() now calls _PyObject_Malloc() rather than calling directly pymalloc_alloc() _PyMem_DebugRawAlloc() and _PyMem_DebugRawRealloc(): * document the layout of a memory block * don't increase the serial number if the allocation failed * check for integer overflow before computing the total size * add a 'data' variable to make the code easiler to follow test_setallocators() of _testcapimodule.c now test also the context.

bpo-18835: Cleanup pymalloc (#4200)
Cleanup pymalloc: * Rename _PyObject_Alloc() to pymalloc_alloc() * Rename _PyObject_FreeImpl() to pymalloc_free() * Rename _PyObject_Realloc() to pymalloc_realloc() * pymalloc_alloc() and pymalloc_realloc() don't fallback on the raw allocator anymore, it now must be done by the caller * Add "success" and "failed" labels to pymalloc_alloc() and pymalloc_free() * pymalloc_alloc() and pymalloc_free() don't update num_allocated_blocks anymore: it should be done in the caller * _PyObject_Calloc() is now responsible to fill the memory block allocated by pymalloc with zeros * Simplify pymalloc_alloc() prototype * _PyObject_Realloc() now calls _PyObject_Malloc() rather than calling directly pymalloc_alloc() _PyMem_DebugRawAlloc() and _PyMem_DebugRawRealloc(): * document the layout of a memory block * don't increase the serial number if the allocation failed * check for integer overflow before computing the total size * add a 'data' variable to make the code easiler to follow test_setallocators() of _testcapimodule.c now test also the context.
9ed83c40 · Victor Stinner · GitHub · ec2cbdd1 · 9ed83c40 · 9ed83c40
Unverified Kaydet (Commit) 9ed83c40 authored Eki 31, 2017 tarafından Victor Stinner Kaydeden (comit) GitHub Eki 31, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 565 additions and 479 deletions

_testcapimodule.c Modules/_testcapimodule.c +32 -7

obmalloc.c Objects/obmalloc.c +533 -472

No files found.
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -3273,34 +3273,39 @@ typedef struct {
    void *realloc_ptr;
    size_t realloc_new_size;
    void *free_ptr;
+    void *ctx;
 } alloc_hook_t;

-static void* hook_malloc (void* ctx, size_t size)
+static void* hook_malloc(void* ctx, size_t size)
 {
    alloc_hook_t *hook = (alloc_hook_t *)ctx;
+    hook->ctx = ctx;
    hook->malloc_size = size;
    return hook->alloc.malloc(hook->alloc.ctx, size);
 }

-static void* hook_calloc (void* ctx, size_t nelem, size_t elsize)
+static void* hook_calloc(void* ctx, size_t nelem, size_t elsize)
 {
    alloc_hook_t *hook = (alloc_hook_t *)ctx;
+    hook->ctx = ctx;
    hook->calloc_nelem = nelem;
    hook->calloc_elsize = elsize;
    return hook->alloc.calloc(hook->alloc.ctx, nelem, elsize);
 }

-static void* hook_realloc (void* ctx, void* ptr, size_t new_size)
+static void* hook_realloc(void* ctx, void* ptr, size_t new_size)
 {
    alloc_hook_t *hook = (alloc_hook_t *)ctx;
+    hook->ctx = ctx;
    hook->realloc_ptr = ptr;
    hook->realloc_new_size = new_size;
    return hook->alloc.realloc(hook->alloc.ctx, ptr, new_size);
 }

-static void hook_free (void *ctx, void *ptr)
+static void hook_free(void *ctx, void *ptr)
 {
    alloc_hook_t *hook = (alloc_hook_t *)ctx;
+    hook->ctx = ctx;
    hook->free_ptr = ptr;
    hook->alloc.free(hook->alloc.ctx, ptr);
 }
@@ -3325,7 +3330,9 @@ test_setallocators(PyMemAllocatorDomain domain)
    PyMem_GetAllocator(domain, &hook.alloc);
    PyMem_SetAllocator(domain, &alloc);

+    /* malloc, realloc, free */
    size = 42;
+    hook.ctx = NULL;
    switch(domain)
    {
    case PYMEM_DOMAIN_RAW: ptr = PyMem_RawMalloc(size); break;
@@ -3334,11 +3341,18 @@ test_setallocators(PyMemAllocatorDomain domain)
    default: ptr = NULL; break;
    }

+#define CHECK_CTX(FUNC) \
+    if (hook.ctx != &hook) { \
+        error_msg = FUNC " wrong context"; \
+        goto fail; \
+    } \
+    hook.ctx = NULL;  /* reset for next check */
+
    if (ptr == NULL) {
        error_msg = "malloc failed";
        goto fail;
    }
-
+    CHECK_CTX("malloc");
    if (hook.malloc_size != size) {
        error_msg = "malloc invalid size";
        goto fail;
@@ -3357,7 +3371,7 @@ test_setallocators(PyMemAllocatorDomain domain)
        error_msg = "realloc failed";
        goto fail;
    }
-
+    CHECK_CTX("realloc");
    if (hook.realloc_ptr != ptr
        || hook.realloc_new_size != size2) {
        error_msg = "realloc invalid parameters";
@@ -3371,11 +3385,13 @@ test_setallocators(PyMemAllocatorDomain domain)
    case PYMEM_DOMAIN_OBJ: PyObject_Free(ptr2); break;
    }

+    CHECK_CTX("free");
    if (hook.free_ptr != ptr2) {
        error_msg = "free invalid pointer";
        goto fail;
    }

+    /* calloc, free */
    nelem = 2;
    elsize = 5;
    switch(domain)
@@ -3390,12 +3406,13 @@ test_setallocators(PyMemAllocatorDomain domain)
        error_msg = "calloc failed";
        goto fail;
    }
-
+    CHECK_CTX("calloc");
    if (hook.calloc_nelem != nelem || hook.calloc_elsize != elsize) {
        error_msg = "calloc invalid nelem or elsize";
        goto fail;
    }

+    hook.free_ptr = NULL;
    switch(domain)
    {
    case PYMEM_DOMAIN_RAW: PyMem_RawFree(ptr); break;
@@ -3403,6 +3420,12 @@ test_setallocators(PyMemAllocatorDomain domain)
    case PYMEM_DOMAIN_OBJ: PyObject_Free(ptr); break;
    }

+    CHECK_CTX("calloc free");
+    if (hook.free_ptr != ptr) {
+        error_msg = "calloc free invalid pointer";
+        goto fail;
+    }
+
    Py_INCREF(Py_None);
    res = Py_None;
    goto finally;
@@ -3413,6 +3436,8 @@ fail:
 finally:
    PyMem_SetAllocator(domain, &hook.alloc);
    return res;
+
+#undef CHECK_CTX
 }

 static PyObject *

--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -18,7 +18,7 @@ extern void _PyMem_DumpTraceback(int fd, const void *ptr);
 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
-static void _PyMem_DebugRawFree(void *ctx, void *p);
+static void _PyMem_DebugRawFree(void *ctx, void *ptr);

 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
@@ -444,6 +444,7 @@ PyMem_RawFree(void *ptr)
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
 }

+
 void *
 PyMem_Malloc(size_t size)
 {
@@ -477,6 +478,7 @@ PyMem_Free(void *ptr)
    _PyMem.free(_PyMem.ctx, ptr);
 }

+
 char *
 _PyMem_RawStrdup(const char *str)
 {
@@ -556,6 +558,7 @@ PyObject_Free(void *ptr)
 static int running_on_valgrind = -1;
 #endif

+
 Py_ssize_t
 _Py_GetAllocatedBlocks(void)
 {
@@ -661,6 +664,7 @@ new_arena(void)
    return arenaobj;
 }

+
 /*
 address_in_range(P, POOL)

@@ -750,442 +754,289 @@ address_in_range(void *p, poolp pool)
        _PyRuntime.mem.arenas[arenaindex].address != 0;
 }

+
 /*==========================================================================*/

-/* malloc.  Note that nbytes==0 tries to return a non-NULL pointer, distinct
- * from all other currently live pointers.  This may not be possible.
- */
+/* pymalloc allocator

-/*
- * The basic blocks are ordered by decreasing execution frequency,
- * which minimizes the number of jumps in the most common cases,
- * improves branching prediction and instruction scheduling (small
- * block allocations typically result in a couple of instructions).
- * Unless the optimizer reorders everything, being too smart...
- */
+   The basic blocks are ordered by decreasing execution frequency,
+   which minimizes the number of jumps in the most common cases,
+   improves branching prediction and instruction scheduling (small
+   block allocations typically result in a couple of instructions).
+   Unless the optimizer reorders everything, being too smart...

-static void *
-_PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
+   Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
+
+   Return 0 if pymalloc failed to allocate the memory block: on bigger
+   requests, on error in the code below (as a last chance to serve the request)
+   or when the max memory limit has been reached. */
+static int
+pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
 {
-    size_t nbytes;
    pyblock *bp;
    poolp pool;
    poolp next;
    uint size;

-    _PyRuntime.mem.num_allocated_blocks++;
-
-    assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
-    nbytes = nelem * elsize;
-
 #ifdef WITH_VALGRIND
-    if (UNLIKELY(running_on_valgrind == -1))
+    if (UNLIKELY(running_on_valgrind == -1)) {
        running_on_valgrind = RUNNING_ON_VALGRIND;
-    if (UNLIKELY(running_on_valgrind))
-        goto redirect;
+    }
+    if (UNLIKELY(running_on_valgrind)) {
+        return 0;
+    }
 #endif

-    if (nelem == 0 || elsize == 0)
-        goto redirect;
+    if (nbytes == 0) {
+        return 0;
+    }
+    if (nbytes > SMALL_REQUEST_THRESHOLD) {
+        return 0;
+    }

-    if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
-        LOCK();
+    LOCK();
+    /*
+     * Most frequent paths first
+     */
+    size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
+    pool = _PyRuntime.mem.usedpools[size + size];
+    if (pool != pool->nextpool) {
        /*
-         * Most frequent paths first
+         * There is a used pool for this size class.
+         * Pick up the head block of its free list.
         */
-        size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
-        pool = _PyRuntime.mem.usedpools[size + size];
-        if (pool != pool->nextpool) {
-            /*
-             * There is a used pool for this size class.
-             * Pick up the head block of its free list.
-             */
-            ++pool->ref.count;
-            bp = pool->freeblock;
-            assert(bp != NULL);
-            if ((pool->freeblock = *(pyblock **)bp) != NULL) {
-                UNLOCK();
-                if (use_calloc)
-                    memset(bp, 0, nbytes);
-                return (void *)bp;
-            }
-            /*
-             * Reached the end of the free list, try to extend it.
-             */
-            if (pool->nextoffset <= pool->maxnextoffset) {
-                /* There is room for another block. */
-                pool->freeblock = (pyblock*)pool +
-                                  pool->nextoffset;
-                pool->nextoffset += INDEX2SIZE(size);
-                *(pyblock **)(pool->freeblock) = NULL;
-                UNLOCK();
-                if (use_calloc)
-                    memset(bp, 0, nbytes);
-                return (void *)bp;
-            }
-            /* Pool is full, unlink from used pools. */
-            next = pool->nextpool;
-            pool = pool->prevpool;
-            next->prevpool = pool;
-            pool->nextpool = next;
-            UNLOCK();
-            if (use_calloc)
-                memset(bp, 0, nbytes);
-            return (void *)bp;
+        ++pool->ref.count;
+        bp = pool->freeblock;
+        assert(bp != NULL);
+        if ((pool->freeblock = *(pyblock **)bp) != NULL) {
+            goto success;
        }

-        /* There isn't a pool of the right size class immediately
-         * available:  use a free pool.
+        /*
+         * Reached the end of the free list, try to extend it.
         */
-        if (_PyRuntime.mem.usable_arenas == NULL) {
-            /* No arena has a free pool:  allocate a new arena. */
-#ifdef WITH_MEMORY_LIMITS
-            if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) {
-                UNLOCK();
-                goto redirect;
-            }
-#endif
-            _PyRuntime.mem.usable_arenas = new_arena();
-            if (_PyRuntime.mem.usable_arenas == NULL) {
-                UNLOCK();
-                goto redirect;
-            }
-            _PyRuntime.mem.usable_arenas->nextarena =
-                _PyRuntime.mem.usable_arenas->prevarena = NULL;
-        }
-        assert(_PyRuntime.mem.usable_arenas->address != 0);
-
-        /* Try to get a cached free pool. */
-        pool = _PyRuntime.mem.usable_arenas->freepools;
-        if (pool != NULL) {
-            /* Unlink from cached pools. */
-            _PyRuntime.mem.usable_arenas->freepools = pool->nextpool;
-
-            /* This arena already had the smallest nfreepools
-             * value, so decreasing nfreepools doesn't change
-             * that, and we don't need to rearrange the
-             * usable_arenas list.  However, if the arena has
-             * become wholly allocated, we need to remove its
-             * arena_object from usable_arenas.
-             */
-            --_PyRuntime.mem.usable_arenas->nfreepools;
-            if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
-                /* Wholly allocated:  remove. */
-                assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
-                assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
-                       _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
-                       _PyRuntime.mem.usable_arenas);
-
-                _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
-                if (_PyRuntime.mem.usable_arenas != NULL) {
-                    _PyRuntime.mem.usable_arenas->prevarena = NULL;
-                    assert(_PyRuntime.mem.usable_arenas->address != 0);
-                }
-            }
-            else {
-                /* nfreepools > 0:  it must be that freepools
-                 * isn't NULL, or that we haven't yet carved
-                 * off all the arena's pools for the first
-                 * time.
-                 */
-                assert(_PyRuntime.mem.usable_arenas->freepools != NULL ||
-                       _PyRuntime.mem.usable_arenas->pool_address <=
-                       (pyblock*)_PyRuntime.mem.usable_arenas->address +
-                           ARENA_SIZE - POOL_SIZE);
-            }
-        init_pool:
-            /* Frontlink to used pools. */
-            next = _PyRuntime.mem.usedpools[size + size]; /* == prev */
-            pool->nextpool = next;
-            pool->prevpool = next;
-            next->nextpool = pool;
-            next->prevpool = pool;
-            pool->ref.count = 1;
-            if (pool->szidx == size) {
-                /* Luckily, this pool last contained blocks
-                 * of the same size class, so its header
-                 * and free list are already initialized.
-                 */
-                bp = pool->freeblock;
-                assert(bp != NULL);
-                pool->freeblock = *(pyblock **)bp;
-                UNLOCK();
-                if (use_calloc)
-                    memset(bp, 0, nbytes);
-                return (void *)bp;
-            }
-            /*
-             * Initialize the pool header, set up the free list to
-             * contain just the second block, and return the first
-             * block.
-             */
-            pool->szidx = size;
-            size = INDEX2SIZE(size);
-            bp = (pyblock *)pool + POOL_OVERHEAD;
-            pool->nextoffset = POOL_OVERHEAD + (size << 1);
-            pool->maxnextoffset = POOL_SIZE - size;
-            pool->freeblock = bp + size;
+        if (pool->nextoffset <= pool->maxnextoffset) {
+            /* There is room for another block. */
+            pool->freeblock = (pyblock*)pool +
+                              pool->nextoffset;
+            pool->nextoffset += INDEX2SIZE(size);
            *(pyblock **)(pool->freeblock) = NULL;
-            UNLOCK();
-            if (use_calloc)
-                memset(bp, 0, nbytes);
-            return (void *)bp;
+            goto success;
        }

-        /* Carve off a new pool. */
-        assert(_PyRuntime.mem.usable_arenas->nfreepools > 0);
-        assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
-        pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address;
-        assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address +
-                                 ARENA_SIZE - POOL_SIZE);
-        pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas);
-        assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas);
-        pool->szidx = DUMMY_SIZE_IDX;
-        _PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE;
-        --_PyRuntime.mem.usable_arenas->nfreepools;
+        /* Pool is full, unlink from used pools. */
+        next = pool->nextpool;
+        pool = pool->prevpool;
+        next->prevpool = pool;
+        pool->nextpool = next;
+        goto success;
+    }

+    /* There isn't a pool of the right size class immediately
+     * available:  use a free pool.
+     */
+    if (_PyRuntime.mem.usable_arenas == NULL) {
+        /* No arena has a free pool:  allocate a new arena. */
+#ifdef WITH_MEMORY_LIMITS
+        if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) {
+            goto failed;
+        }
+#endif
+        _PyRuntime.mem.usable_arenas = new_arena();
+        if (_PyRuntime.mem.usable_arenas == NULL) {
+            goto failed;
+        }
+        _PyRuntime.mem.usable_arenas->nextarena =
+            _PyRuntime.mem.usable_arenas->prevarena = NULL;
+    }
+    assert(_PyRuntime.mem.usable_arenas->address != 0);
+
+    /* Try to get a cached free pool. */
+    pool = _PyRuntime.mem.usable_arenas->freepools;
+    if (pool != NULL) {
+        /* Unlink from cached pools. */
+        _PyRuntime.mem.usable_arenas->freepools = pool->nextpool;
+
+        /* This arena already had the smallest nfreepools
+         * value, so decreasing nfreepools doesn't change
+         * that, and we don't need to rearrange the
+         * usable_arenas list.  However, if the arena has
+         * become wholly allocated, we need to remove its
+         * arena_object from usable_arenas.
+         */
+        --_PyRuntime.mem.usable_arenas->nfreepools;
        if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
+            /* Wholly allocated:  remove. */
+            assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
            assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
                   _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
                   _PyRuntime.mem.usable_arenas);
-            /* Unlink the arena:  it is completely allocated. */
+
            _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
            if (_PyRuntime.mem.usable_arenas != NULL) {
                _PyRuntime.mem.usable_arenas->prevarena = NULL;
                assert(_PyRuntime.mem.usable_arenas->address != 0);
            }
        }
+        else {
+            /* nfreepools > 0:  it must be that freepools
+             * isn't NULL, or that we haven't yet carved
+             * off all the arena's pools for the first
+             * time.
+             */
+            assert(_PyRuntime.mem.usable_arenas->freepools != NULL ||
+                   _PyRuntime.mem.usable_arenas->pool_address <=
+                   (pyblock*)_PyRuntime.mem.usable_arenas->address +
+                       ARENA_SIZE - POOL_SIZE);
+        }

-        goto init_pool;
+    init_pool:
+        /* Frontlink to used pools. */
+        next = _PyRuntime.mem.usedpools[size + size]; /* == prev */
+        pool->nextpool = next;
+        pool->prevpool = next;
+        next->nextpool = pool;
+        next->prevpool = pool;
+        pool->ref.count = 1;
+        if (pool->szidx == size) {
+            /* Luckily, this pool last contained blocks
+             * of the same size class, so its header
+             * and free list are already initialized.
+             */
+            bp = pool->freeblock;
+            assert(bp != NULL);
+            pool->freeblock = *(pyblock **)bp;
+            goto success;
+        }
+        /*
+         * Initialize the pool header, set up the free list to
+         * contain just the second block, and return the first
+         * block.
+         */
+        pool->szidx = size;
+        size = INDEX2SIZE(size);
+        bp = (pyblock *)pool + POOL_OVERHEAD;
+        pool->nextoffset = POOL_OVERHEAD + (size << 1);
+        pool->maxnextoffset = POOL_SIZE - size;
+        pool->freeblock = bp + size;
+        *(pyblock **)(pool->freeblock) = NULL;
+        goto success;
    }

-    /* The small block allocator ends here. */
-
-redirect:
-    /* Redirect the original request to the underlying (libc) allocator.
-     * We jump here on bigger requests, on error in the code above (as a
-     * last chance to serve the request) or when the max memory limit
-     * has been reached.
-     */
-    {
-        void *result;
-        if (use_calloc)
-            result = PyMem_RawCalloc(nelem, elsize);
-        else
-            result = PyMem_RawMalloc(nbytes);
-        if (!result)
-            _PyRuntime.mem.num_allocated_blocks--;
-        return result;
+    /* Carve off a new pool. */
+    assert(_PyRuntime.mem.usable_arenas->nfreepools > 0);
+    assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
+    pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address;
+    assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address +
+                             ARENA_SIZE - POOL_SIZE);
+    pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas);
+    assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas);
+    pool->szidx = DUMMY_SIZE_IDX;
+    _PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE;
+    --_PyRuntime.mem.usable_arenas->nfreepools;
+
+    if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
+        assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
+               _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
+               _PyRuntime.mem.usable_arenas);
+        /* Unlink the arena:  it is completely allocated. */
+        _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
+        if (_PyRuntime.mem.usable_arenas != NULL) {
+            _PyRuntime.mem.usable_arenas->prevarena = NULL;
+            assert(_PyRuntime.mem.usable_arenas->address != 0);
+        }
    }
+
+    goto init_pool;
+
+success:
+    UNLOCK();
+    assert(bp != NULL);
+    *ptr_p = (void *)bp;
+    return 1;
+
+failed:
+    UNLOCK();
+    return 0;
 }

+
 static void *
 _PyObject_Malloc(void *ctx, size_t nbytes)
 {
-    return _PyObject_Alloc(0, ctx, 1, nbytes);
+    void* ptr;
+    if (pymalloc_alloc(ctx, &ptr, nbytes)) {
+        _PyRuntime.mem.num_allocated_blocks++;
+        return ptr;
+    }
+
+    ptr = PyMem_RawMalloc(nbytes);
+    if (ptr != NULL) {
+        _PyRuntime.mem.num_allocated_blocks++;
+    }
+    return ptr;
 }

+
 static void *
 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
 {
-    return _PyObject_Alloc(1, ctx, nelem, elsize);
+    void* ptr;
+
+    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
+    size_t nbytes = nelem * elsize;
+
+    if (pymalloc_alloc(ctx, &ptr, nbytes)) {
+        memset(ptr, 0, nbytes);
+        _PyRuntime.mem.num_allocated_blocks++;
+        return ptr;
+    }
+
+    ptr = PyMem_RawCalloc(nelem, elsize);
+    if (ptr != NULL) {
+        _PyRuntime.mem.num_allocated_blocks++;
+    }
+    return ptr;
 }

-/* free */

-static void
-_PyObject_Free(void *ctx, void *p)
+/* Free a memory block allocated by pymalloc_alloc().
+   Return 1 if it was freed.
+   Return 0 if the block was not allocated by pymalloc_alloc(). */
+static int
+pymalloc_free(void *ctx, void *p)
 {
    poolp pool;
    pyblock *lastfree;
    poolp next, prev;
    uint size;

-    if (p == NULL)      /* free(NULL) has no effect */
-        return;
-
-    _PyRuntime.mem.num_allocated_blocks--;
+    assert(p != NULL);

 #ifdef WITH_VALGRIND
-    if (UNLIKELY(running_on_valgrind > 0))
-        goto redirect;
+    if (UNLIKELY(running_on_valgrind > 0)) {
+        return 0;
+    }
 #endif

    pool = POOL_ADDR(p);
-    if (address_in_range(p, pool)) {
-        /* We allocated this address. */
-        LOCK();
-        /* Link p to the start of the pool's freeblock list.  Since
-         * the pool had at least the p block outstanding, the pool
-         * wasn't empty (so it's already in a usedpools[] list, or
-         * was full and is in no list -- it's not in the freeblocks
-         * list in any case).
-         */
-        assert(pool->ref.count > 0);            /* else it was empty */
-        *(pyblock **)p = lastfree = pool->freeblock;
-        pool->freeblock = (pyblock *)p;
-        if (lastfree) {
-            struct arena_object* ao;
-            uint nf;  /* ao->nfreepools */
-
-            /* freeblock wasn't NULL, so the pool wasn't full,
-             * and the pool is in a usedpools[] list.
-             */
-            if (--pool->ref.count != 0) {
-                /* pool isn't empty:  leave it in usedpools */
-                UNLOCK();
-                return;
-            }
-            /* Pool is now empty:  unlink from usedpools, and
-             * link to the front of freepools.  This ensures that
-             * previously freed pools will be allocated later
-             * (being not referenced, they are perhaps paged out).
-             */
-            next = pool->nextpool;
-            prev = pool->prevpool;
-            next->prevpool = prev;
-            prev->nextpool = next;
-
-            /* Link the pool to freepools.  This is a singly-linked
-             * list, and pool->prevpool isn't used there.
-             */
-            ao = &_PyRuntime.mem.arenas[pool->arenaindex];
-            pool->nextpool = ao->freepools;
-            ao->freepools = pool;
-            nf = ++ao->nfreepools;
-
-            /* All the rest is arena management.  We just freed
-             * a pool, and there are 4 cases for arena mgmt:
-             * 1. If all the pools are free, return the arena to
-             *    the system free().
-             * 2. If this is the only free pool in the arena,
-             *    add the arena back to the `usable_arenas` list.
-             * 3. If the "next" arena has a smaller count of free
-             *    pools, we have to "slide this arena right" to
-             *    restore that usable_arenas is sorted in order of
-             *    nfreepools.
-             * 4. Else there's nothing more to do.
-             */
-            if (nf == ao->ntotalpools) {
-                /* Case 1.  First unlink ao from usable_arenas.
-                 */
-                assert(ao->prevarena == NULL ||
-                       ao->prevarena->address != 0);
-                assert(ao ->nextarena == NULL ||
-                       ao->nextarena->address != 0);
-
-                /* Fix the pointer in the prevarena, or the
-                 * usable_arenas pointer.
-                 */
-                if (ao->prevarena == NULL) {
-                    _PyRuntime.mem.usable_arenas = ao->nextarena;
-                    assert(_PyRuntime.mem.usable_arenas == NULL ||
-                           _PyRuntime.mem.usable_arenas->address != 0);
-                }
-                else {
-                    assert(ao->prevarena->nextarena == ao);
-                    ao->prevarena->nextarena =
-                        ao->nextarena;
-                }
-                /* Fix the pointer in the nextarena. */
-                if (ao->nextarena != NULL) {
-                    assert(ao->nextarena->prevarena == ao);
-                    ao->nextarena->prevarena =
-                        ao->prevarena;
-                }
-                /* Record that this arena_object slot is
-                 * available to be reused.
-                 */
-                ao->nextarena = _PyRuntime.mem.unused_arena_objects;
-                _PyRuntime.mem.unused_arena_objects = ao;
-
-                /* Free the entire arena. */
-                _PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx,
-                                     (void *)ao->address, ARENA_SIZE);
-                ao->address = 0;                        /* mark unassociated */
-                --_PyRuntime.mem.narenas_currently_allocated;
-
-                UNLOCK();
-                return;
-            }
-            if (nf == 1) {
-                /* Case 2.  Put ao at the head of
-                 * usable_arenas.  Note that because
-                 * ao->nfreepools was 0 before, ao isn't
-                 * currently on the usable_arenas list.
-                 */
-                ao->nextarena = _PyRuntime.mem.usable_arenas;
-                ao->prevarena = NULL;
-                if (_PyRuntime.mem.usable_arenas)
-                    _PyRuntime.mem.usable_arenas->prevarena = ao;
-                _PyRuntime.mem.usable_arenas = ao;
-                assert(_PyRuntime.mem.usable_arenas->address != 0);
-
-                UNLOCK();
-                return;
-            }
-            /* If this arena is now out of order, we need to keep
-             * the list sorted.  The list is kept sorted so that
-             * the "most full" arenas are used first, which allows
-             * the nearly empty arenas to be completely freed.  In
-             * a few un-scientific tests, it seems like this
-             * approach allowed a lot more memory to be freed.
-             */
-            if (ao->nextarena == NULL ||
-                         nf <= ao->nextarena->nfreepools) {
-                /* Case 4.  Nothing to do. */
-                UNLOCK();
-                return;
-            }
-            /* Case 3:  We have to move the arena towards the end
-             * of the list, because it has more free pools than
-             * the arena to its right.
-             * First unlink ao from usable_arenas.
-             */
-            if (ao->prevarena != NULL) {
-                /* ao isn't at the head of the list */
-                assert(ao->prevarena->nextarena == ao);
-                ao->prevarena->nextarena = ao->nextarena;
-            }
-            else {
-                /* ao is at the head of the list */
-                assert(_PyRuntime.mem.usable_arenas == ao);
-                _PyRuntime.mem.usable_arenas = ao->nextarena;
-            }
-            ao->nextarena->prevarena = ao->prevarena;
+    if (!address_in_range(p, pool)) {
+        return 0;
+    }
+    /* We allocated this address. */

-            /* Locate the new insertion point by iterating over
-             * the list, using our nextarena pointer.
-             */
-            while (ao->nextarena != NULL &&
-                            nf > ao->nextarena->nfreepools) {
-                ao->prevarena = ao->nextarena;
-                ao->nextarena = ao->nextarena->nextarena;
-            }
+    LOCK();

-            /* Insert ao at this point. */
-            assert(ao->nextarena == NULL ||
-                ao->prevarena == ao->nextarena->prevarena);
-            assert(ao->prevarena->nextarena == ao->nextarena);
-
-            ao->prevarena->nextarena = ao;
-            if (ao->nextarena != NULL)
-                ao->nextarena->prevarena = ao;
-
-            /* Verify that the swaps worked. */
-            assert(ao->nextarena == NULL ||
-                      nf <= ao->nextarena->nfreepools);
-            assert(ao->prevarena == NULL ||
-                      nf > ao->prevarena->nfreepools);
-            assert(ao->nextarena == NULL ||
-                ao->nextarena->prevarena == ao);
-            assert((_PyRuntime.mem.usable_arenas == ao &&
-                ao->prevarena == NULL) ||
-                ao->prevarena->nextarena == ao);
-
-            UNLOCK();
-            return;
-        }
+    /* Link p to the start of the pool's freeblock list.  Since
+     * the pool had at least the p block outstanding, the pool
+     * wasn't empty (so it's already in a usedpools[] list, or
+     * was full and is in no list -- it's not in the freeblocks
+     * list in any case).
+     */
+    assert(pool->ref.count > 0);            /* else it was empty */
+    *(pyblock **)p = lastfree = pool->freeblock;
+    pool->freeblock = (pyblock *)p;
+    if (!lastfree) {
        /* Pool was full, so doesn't currently live in any list:
         * link it to the front of the appropriate usedpools[] list.
         * This mimics LRU pool usage for new allocations and
@@ -1197,93 +1048,274 @@ _PyObject_Free(void *ctx, void *p)
        size = pool->szidx;
        next = _PyRuntime.mem.usedpools[size + size];
        prev = next->prevpool;
+
        /* insert pool before next:   prev <-> pool <-> next */
        pool->nextpool = next;
        pool->prevpool = prev;
        next->prevpool = pool;
        prev->nextpool = pool;
-        UNLOCK();
+        goto success;
+    }
+
+    struct arena_object* ao;
+    uint nf;  /* ao->nfreepools */
+
+    /* freeblock wasn't NULL, so the pool wasn't full,
+     * and the pool is in a usedpools[] list.
+     */
+    if (--pool->ref.count != 0) {
+        /* pool isn't empty:  leave it in usedpools */
+        goto success;
+    }
+    /* Pool is now empty:  unlink from usedpools, and
+     * link to the front of freepools.  This ensures that
+     * previously freed pools will be allocated later
+     * (being not referenced, they are perhaps paged out).
+     */
+    next = pool->nextpool;
+    prev = pool->prevpool;
+    next->prevpool = prev;
+    prev->nextpool = next;
+
+    /* Link the pool to freepools.  This is a singly-linked
+     * list, and pool->prevpool isn't used there.
+     */
+    ao = &_PyRuntime.mem.arenas[pool->arenaindex];
+    pool->nextpool = ao->freepools;
+    ao->freepools = pool;
+    nf = ++ao->nfreepools;
+
+    /* All the rest is arena management.  We just freed
+     * a pool, and there are 4 cases for arena mgmt:
+     * 1. If all the pools are free, return the arena to
+     *    the system free().
+     * 2. If this is the only free pool in the arena,
+     *    add the arena back to the `usable_arenas` list.
+     * 3. If the "next" arena has a smaller count of free
+     *    pools, we have to "slide this arena right" to
+     *    restore that usable_arenas is sorted in order of
+     *    nfreepools.
+     * 4. Else there's nothing more to do.
+     */
+    if (nf == ao->ntotalpools) {
+        /* Case 1.  First unlink ao from usable_arenas.
+         */
+        assert(ao->prevarena == NULL ||
+               ao->prevarena->address != 0);
+        assert(ao ->nextarena == NULL ||
+               ao->nextarena->address != 0);
+
+        /* Fix the pointer in the prevarena, or the
+         * usable_arenas pointer.
+         */
+        if (ao->prevarena == NULL) {
+            _PyRuntime.mem.usable_arenas = ao->nextarena;
+            assert(_PyRuntime.mem.usable_arenas == NULL ||
+                   _PyRuntime.mem.usable_arenas->address != 0);
+        }
+        else {
+            assert(ao->prevarena->nextarena == ao);
+            ao->prevarena->nextarena =
+                ao->nextarena;
+        }
+        /* Fix the pointer in the nextarena. */
+        if (ao->nextarena != NULL) {
+            assert(ao->nextarena->prevarena == ao);
+            ao->nextarena->prevarena =
+                ao->prevarena;
+        }
+        /* Record that this arena_object slot is
+         * available to be reused.
+         */
+        ao->nextarena = _PyRuntime.mem.unused_arena_objects;
+        _PyRuntime.mem.unused_arena_objects = ao;
+
+        /* Free the entire arena. */
+        _PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx,
+                             (void *)ao->address, ARENA_SIZE);
+        ao->address = 0;                        /* mark unassociated */
+        --_PyRuntime.mem.narenas_currently_allocated;
+
+        goto success;
+    }
+
+    if (nf == 1) {
+        /* Case 2.  Put ao at the head of
+         * usable_arenas.  Note that because
+         * ao->nfreepools was 0 before, ao isn't
+         * currently on the usable_arenas list.
+         */
+        ao->nextarena = _PyRuntime.mem.usable_arenas;
+        ao->prevarena = NULL;
+        if (_PyRuntime.mem.usable_arenas)
+            _PyRuntime.mem.usable_arenas->prevarena = ao;
+        _PyRuntime.mem.usable_arenas = ao;
+        assert(_PyRuntime.mem.usable_arenas->address != 0);
+
+        goto success;
+    }
+
+    /* If this arena is now out of order, we need to keep
+     * the list sorted.  The list is kept sorted so that
+     * the "most full" arenas are used first, which allows
+     * the nearly empty arenas to be completely freed.  In
+     * a few un-scientific tests, it seems like this
+     * approach allowed a lot more memory to be freed.
+     */
+    if (ao->nextarena == NULL ||
+                 nf <= ao->nextarena->nfreepools) {
+        /* Case 4.  Nothing to do. */
+        goto success;
+    }
+    /* Case 3:  We have to move the arena towards the end
+     * of the list, because it has more free pools than
+     * the arena to its right.
+     * First unlink ao from usable_arenas.
+     */
+    if (ao->prevarena != NULL) {
+        /* ao isn't at the head of the list */
+        assert(ao->prevarena->nextarena == ao);
+        ao->prevarena->nextarena = ao->nextarena;
+    }
+    else {
+        /* ao is at the head of the list */
+        assert(_PyRuntime.mem.usable_arenas == ao);
+        _PyRuntime.mem.usable_arenas = ao->nextarena;
+    }
+    ao->nextarena->prevarena = ao->prevarena;
+
+    /* Locate the new insertion point by iterating over
+     * the list, using our nextarena pointer.
+     */
+    while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) {
+        ao->prevarena = ao->nextarena;
+        ao->nextarena = ao->nextarena->nextarena;
+    }
+
+    /* Insert ao at this point. */
+    assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena);
+    assert(ao->prevarena->nextarena == ao->nextarena);
+
+    ao->prevarena->nextarena = ao;
+    if (ao->nextarena != NULL) {
+        ao->nextarena->prevarena = ao;
+    }
+
+    /* Verify that the swaps worked. */
+    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
+    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
+    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
+    assert((_PyRuntime.mem.usable_arenas == ao && ao->prevarena == NULL)
+           || ao->prevarena->nextarena == ao);
+
+    goto success;
+
+success:
+    UNLOCK();
+    return 1;
+}
+
+
+static void
+_PyObject_Free(void *ctx, void *p)
+{
+    /* PyObject_Free(NULL) has no effect */
+    if (p == NULL) {
        return;
    }

-#ifdef WITH_VALGRIND
-redirect:
-#endif
-    /* We didn't allocate this address. */
-    PyMem_RawFree(p);
+    _PyRuntime.mem.num_allocated_blocks--;
+    if (!pymalloc_free(ctx, p)) {
+        /* pymalloc didn't allocate this address */
+        PyMem_RawFree(p);
+    }
 }

-/* realloc.  If p is NULL, this acts like malloc(nbytes).  Else if nbytes==0,
- * then as the Python docs promise, we do not treat this like free(p), and
- * return a non-NULL result.
- */

-static void *
-_PyObject_Realloc(void *ctx, void *p, size_t nbytes)
+/* pymalloc realloc.
+
+   If nbytes==0, then as the Python docs promise, we do not treat this like
+   free(p), and return a non-NULL result.
+
+   Return 1 if pymalloc reallocated memory and wrote the new pointer into
+   newptr_p.
+
+   Return 0 if pymalloc didn't allocated p. */
+static int
+pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
 {
    void *bp;
    poolp pool;
    size_t size;

-    if (p == NULL)
-        return _PyObject_Alloc(0, ctx, 1, nbytes);
+    assert(p != NULL);

 #ifdef WITH_VALGRIND
    /* Treat running_on_valgrind == -1 the same as 0 */
-    if (UNLIKELY(running_on_valgrind > 0))
-        goto redirect;
+    if (UNLIKELY(running_on_valgrind > 0)) {
+        return 0;
+    }
 #endif

    pool = POOL_ADDR(p);
-    if (address_in_range(p, pool)) {
-        /* We're in charge of this block */
-        size = INDEX2SIZE(pool->szidx);
-        if (nbytes <= size) {
-            /* The block is staying the same or shrinking.  If
-             * it's shrinking, there's a tradeoff:  it costs
-             * cycles to copy the block to a smaller size class,
-             * but it wastes memory not to copy it.  The
-             * compromise here is to copy on shrink only if at
-             * least 25% of size can be shaved off.
-             */
-            if (4 * nbytes > 3 * size) {
-                /* It's the same,
-                 * or shrinking and new/old > 3/4.
-                 */
-                return p;
-            }
-            size = nbytes;
-        }
-        bp = _PyObject_Alloc(0, ctx, 1, nbytes);
-        if (bp != NULL) {
-            memcpy(bp, p, size);
-            _PyObject_Free(ctx, p);
+    if (!address_in_range(p, pool)) {
+        /* pymalloc is not managing this block.
+
+           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
+           over this block.  However, if we do, we need to copy the valid data
+           from the C-managed block to one of our blocks, and there's no
+           portable way to know how much of the memory space starting at p is
+           valid.
+
+           As bug 1185883 pointed out the hard way, it's possible that the
+           C-managed block is "at the end" of allocated VM space, so that a
+           memory fault can occur if we try to copy nbytes bytes starting at p.
+           Instead we punt: let C continue to manage this block. */
+        return 0;
+    }
+
+    /* pymalloc is in charge of this block */
+    size = INDEX2SIZE(pool->szidx);
+    if (nbytes <= size) {
+        /* The block is staying the same or shrinking.
+
+           If it's shrinking, there's a tradeoff: it costs cycles to copy the
+           block to a smaller size class, but it wastes memory not to copy it.
+
+           The compromise here is to copy on shrink only if at least 25% of
+           size can be shaved off. */
+        if (4 * nbytes > 3 * size) {
+            /* It's the same, or shrinking and new/old > 3/4. */
+            *newptr_p = p;
+            return 1;
        }
-        return bp;
+        size = nbytes;
    }
-#ifdef WITH_VALGRIND
- redirect:
-#endif
-    /* We're not managing this block.  If nbytes <=
-     * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
-     * block.  However, if we do, we need to copy the valid data from
-     * the C-managed block to one of our blocks, and there's no portable
-     * way to know how much of the memory space starting at p is valid.
-     * As bug 1185883 pointed out the hard way, it's possible that the
-     * C-managed block is "at the end" of allocated VM space, so that
-     * a memory fault can occur if we try to copy nbytes bytes starting
-     * at p.  Instead we punt:  let C continue to manage this block.
-     */
-    if (nbytes)
-        return PyMem_RawRealloc(p, nbytes);
-    /* C doesn't define the result of realloc(p, 0) (it may or may not
-     * return NULL then), but Python's docs promise that nbytes==0 never
-     * returns NULL.  We don't pass 0 to realloc(), to avoid that endcase
-     * to begin with.  Even then, we can't be sure that realloc() won't
-     * return NULL.
-     */
-    bp = PyMem_RawRealloc(p, 1);
-    return bp ? bp : p;
+
+    bp = _PyObject_Malloc(ctx, nbytes);
+    if (bp != NULL) {
+        memcpy(bp, p, size);
+        _PyObject_Free(ctx, p);
+    }
+    *newptr_p = bp;
+    return 1;
+}
+
+
+static void *
+_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
+{
+    void *ptr2;
+
+    if (ptr == NULL) {
+        return _PyObject_Malloc(ctx, nbytes);
+    }
+
+    if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
+        return ptr2;
+    }
+
+    return PyMem_RawRealloc(ptr, nbytes);
 }

 #else   /* ! WITH_PYMALLOC */
@@ -1386,37 +1418,53 @@ static void *
 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
 {
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
-    uint8_t *p;           /* base address of malloc'ed block */
-    uint8_t *tail;        /* p + 2*SST + nbytes == pointer to tail pad bytes */
-    size_t total;       /* nbytes + 4*SST */
+    uint8_t *p;           /* base address of malloc'epad d block */
+    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
+    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
+    size_t total;         /* 2 * SST + nbytes + 2 * SST */

-    bumpserialno();
-    total = nbytes + 4*SST;
-    if (nbytes > PY_SSIZE_T_MAX - 4*SST)
-        /* overflow:  can't represent total as a Py_ssize_t */
+    if (nbytes > (size_t)PY_SSIZE_T_MAX - 4 * SST) {
+        /* integer overflow: can't represent total as a Py_ssize_t */
        return NULL;
+    }
+    total = nbytes + 4 * SST;

-    if (use_calloc)
+    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
+     *          ^--- p    ^--- data   ^--- tail
+       S: nbytes stored as size_t
+       I: API identifier (1 byte)
+       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
+       C: Clean bytes used later to store actual data
+       N: Serial number stored as size_t */
+
+    if (use_calloc) {
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
-    else
+    }
+    else {
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
-    if (p == NULL)
+    }
+    if (p == NULL) {
        return NULL;
+    }
+    data = p + 2*SST;
+
+    bumpserialno();

    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
    write_size_t(p, nbytes);
    p[SST] = (uint8_t)api->api_id;
    memset(p + SST + 1, FORBIDDENBYTE, SST-1);

-    if (nbytes > 0 && !use_calloc)
-        memset(p + 2*SST, CLEANBYTE, nbytes);
+    if (nbytes > 0 && !use_calloc) {
+        memset(data, CLEANBYTE, nbytes);
+    }

    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
-    tail = p + 2*SST + nbytes;
+    tail = data + nbytes;
    memset(tail, FORBIDDENBYTE, SST);
    write_size_t(tail + SST, _PyRuntime.mem.serialno);

-    return p + 2*SST;
+    return data;
 }

 static void *
@@ -1429,11 +1477,12 @@ static void *
 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
 {
    size_t nbytes;
-    assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
+    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
    nbytes = nelem * elsize;
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
 }

+
 /* The debug free first checks the 2*SST bytes on each end for sanity (in
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
   Then fills the original bytes with DEADBYTE.
@@ -1442,63 +1491,73 @@ _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
 static void
 _PyMem_DebugRawFree(void *ctx, void *p)
 {
+    /* PyMem_Free(NULL) has no effect */
+    if (p == NULL) {
+        return;
+    }
+
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
    size_t nbytes;

-    if (p == NULL)
-        return;
    _PyMem_DebugCheckAddress(api->api_id, p);
    nbytes = read_size_t(q);
-    nbytes += 4*SST;
-    if (nbytes > 0)
-        memset(q, DEADBYTE, nbytes);
+    nbytes += 4 * SST;
+    memset(q, DEADBYTE, nbytes);
    api->alloc.free(api->alloc.ctx, q);
 }

+
 static void *
 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
 {
+    if (p == NULL) {
+        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
+    }
+
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
-    uint8_t *q = (uint8_t *)p;
-    uint8_t *tail;
-    size_t total;       /* nbytes + 4*SST */
+    uint8_t *q;           /* base address of malloc'epad d block */
+    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
+    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
+    size_t total;         /* 2 * SST + nbytes + 2 * SST */
    size_t original_nbytes;
    int i;

-    if (p == NULL)
-        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
-
    _PyMem_DebugCheckAddress(api->api_id, p);
-    bumpserialno();
+
+    q = (uint8_t *)p;
    original_nbytes = read_size_t(q - 2*SST);
-    total = nbytes + 4*SST;
-    if (nbytes > PY_SSIZE_T_MAX - 4*SST)
-        /* overflow:  can't represent total as a Py_ssize_t */
+    if (nbytes > (size_t)PY_SSIZE_T_MAX - 4*SST) {
+        /* integer overflow: can't represent total as a Py_ssize_t */
        return NULL;
+    }
+    total = nbytes + 4*SST;

    /* Resize and add decorations. */
    q = (uint8_t *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
-    if (q == NULL)
+    if (q == NULL) {
        return NULL;
+    }

+    bumpserialno();
    write_size_t(q, nbytes);
    assert(q[SST] == (uint8_t)api->api_id);
-    for (i = 1; i < SST; ++i)
+    for (i = 1; i < SST; ++i) {
        assert(q[SST + i] == FORBIDDENBYTE);
-    q += 2*SST;
+    }
+    data = q + 2*SST;

-    tail = q + nbytes;
+    tail = data + nbytes;
    memset(tail, FORBIDDENBYTE, SST);
    write_size_t(tail + SST, _PyRuntime.mem.serialno);

    if (nbytes > original_nbytes) {
        /* growing:  mark new extra memory clean */
-        memset(q + original_nbytes, CLEANBYTE,
+        memset(data + original_nbytes, CLEANBYTE,
               nbytes - original_nbytes);
    }

-    return q;
+    return data;
 }

 static void
@@ -1523,6 +1582,7 @@ _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
 }

+
 static void
 _PyMem_DebugFree(void *ctx, void *ptr)
 {
@@ -1530,6 +1590,7 @@ _PyMem_DebugFree(void *ctx, void *ptr)
    _PyMem_DebugRawFree(ctx, ptr);
 }

+
 static void *
 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
 {