Unverified Kaydet (Commit) 9ed83c40 authored tarafından Victor Stinner's avatar Victor Stinner Kaydeden (comit) GitHub

bpo-18835: Cleanup pymalloc (#4200)

Cleanup pymalloc:

* Rename _PyObject_Alloc() to pymalloc_alloc()
* Rename _PyObject_FreeImpl() to pymalloc_free()
* Rename _PyObject_Realloc() to pymalloc_realloc()
* pymalloc_alloc() and pymalloc_realloc() don't fallback on the raw
  allocator anymore, it now must be done by the caller
* Add "success" and "failed" labels to pymalloc_alloc() and
  pymalloc_free()
* pymalloc_alloc() and pymalloc_free() don't update
  num_allocated_blocks anymore: it should be done in the caller
* _PyObject_Calloc() is now responsible to fill the memory block
  allocated by pymalloc with zeros
* Simplify pymalloc_alloc() prototype
* _PyObject_Realloc() now calls _PyObject_Malloc() rather than
  calling directly pymalloc_alloc()

_PyMem_DebugRawAlloc() and _PyMem_DebugRawRealloc():

* document the layout of a memory block
* don't increase the serial number if the allocation failed
* check for integer overflow before computing the total size
* add a 'data' variable to make the code easiler to follow

test_setallocators() of _testcapimodule.c now test also the context.
üst ec2cbdd1
......@@ -3273,34 +3273,39 @@ typedef struct {
void *realloc_ptr;
size_t realloc_new_size;
void *free_ptr;
void *ctx;
} alloc_hook_t;
static void* hook_malloc (void* ctx, size_t size)
static void* hook_malloc(void* ctx, size_t size)
{
alloc_hook_t *hook = (alloc_hook_t *)ctx;
hook->ctx = ctx;
hook->malloc_size = size;
return hook->alloc.malloc(hook->alloc.ctx, size);
}
static void* hook_calloc (void* ctx, size_t nelem, size_t elsize)
static void* hook_calloc(void* ctx, size_t nelem, size_t elsize)
{
alloc_hook_t *hook = (alloc_hook_t *)ctx;
hook->ctx = ctx;
hook->calloc_nelem = nelem;
hook->calloc_elsize = elsize;
return hook->alloc.calloc(hook->alloc.ctx, nelem, elsize);
}
static void* hook_realloc (void* ctx, void* ptr, size_t new_size)
static void* hook_realloc(void* ctx, void* ptr, size_t new_size)
{
alloc_hook_t *hook = (alloc_hook_t *)ctx;
hook->ctx = ctx;
hook->realloc_ptr = ptr;
hook->realloc_new_size = new_size;
return hook->alloc.realloc(hook->alloc.ctx, ptr, new_size);
}
static void hook_free (void *ctx, void *ptr)
static void hook_free(void *ctx, void *ptr)
{
alloc_hook_t *hook = (alloc_hook_t *)ctx;
hook->ctx = ctx;
hook->free_ptr = ptr;
hook->alloc.free(hook->alloc.ctx, ptr);
}
......@@ -3325,7 +3330,9 @@ test_setallocators(PyMemAllocatorDomain domain)
PyMem_GetAllocator(domain, &hook.alloc);
PyMem_SetAllocator(domain, &alloc);
/* malloc, realloc, free */
size = 42;
hook.ctx = NULL;
switch(domain)
{
case PYMEM_DOMAIN_RAW: ptr = PyMem_RawMalloc(size); break;
......@@ -3334,11 +3341,18 @@ test_setallocators(PyMemAllocatorDomain domain)
default: ptr = NULL; break;
}
#define CHECK_CTX(FUNC) \
if (hook.ctx != &hook) { \
error_msg = FUNC " wrong context"; \
goto fail; \
} \
hook.ctx = NULL; /* reset for next check */
if (ptr == NULL) {
error_msg = "malloc failed";
goto fail;
}
CHECK_CTX("malloc");
if (hook.malloc_size != size) {
error_msg = "malloc invalid size";
goto fail;
......@@ -3357,7 +3371,7 @@ test_setallocators(PyMemAllocatorDomain domain)
error_msg = "realloc failed";
goto fail;
}
CHECK_CTX("realloc");
if (hook.realloc_ptr != ptr
|| hook.realloc_new_size != size2) {
error_msg = "realloc invalid parameters";
......@@ -3371,11 +3385,13 @@ test_setallocators(PyMemAllocatorDomain domain)
case PYMEM_DOMAIN_OBJ: PyObject_Free(ptr2); break;
}
CHECK_CTX("free");
if (hook.free_ptr != ptr2) {
error_msg = "free invalid pointer";
goto fail;
}
/* calloc, free */
nelem = 2;
elsize = 5;
switch(domain)
......@@ -3390,12 +3406,13 @@ test_setallocators(PyMemAllocatorDomain domain)
error_msg = "calloc failed";
goto fail;
}
CHECK_CTX("calloc");
if (hook.calloc_nelem != nelem || hook.calloc_elsize != elsize) {
error_msg = "calloc invalid nelem or elsize";
goto fail;
}
hook.free_ptr = NULL;
switch(domain)
{
case PYMEM_DOMAIN_RAW: PyMem_RawFree(ptr); break;
......@@ -3403,6 +3420,12 @@ test_setallocators(PyMemAllocatorDomain domain)
case PYMEM_DOMAIN_OBJ: PyObject_Free(ptr); break;
}
CHECK_CTX("calloc free");
if (hook.free_ptr != ptr) {
error_msg = "calloc free invalid pointer";
goto fail;
}
Py_INCREF(Py_None);
res = Py_None;
goto finally;
......@@ -3413,6 +3436,8 @@ fail:
finally:
PyMem_SetAllocator(domain, &hook.alloc);
return res;
#undef CHECK_CTX
}
static PyObject *
......
......@@ -18,7 +18,7 @@ extern void _PyMem_DumpTraceback(int fd, const void *ptr);
static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
static void _PyMem_DebugRawFree(void *ctx, void *p);
static void _PyMem_DebugRawFree(void *ctx, void *ptr);
static void* _PyMem_DebugMalloc(void *ctx, size_t size);
static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
......@@ -444,6 +444,7 @@ PyMem_RawFree(void *ptr)
_PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
}
void *
PyMem_Malloc(size_t size)
{
......@@ -477,6 +478,7 @@ PyMem_Free(void *ptr)
_PyMem.free(_PyMem.ctx, ptr);
}
char *
_PyMem_RawStrdup(const char *str)
{
......@@ -556,6 +558,7 @@ PyObject_Free(void *ptr)
static int running_on_valgrind = -1;
#endif
Py_ssize_t
_Py_GetAllocatedBlocks(void)
{
......@@ -661,6 +664,7 @@ new_arena(void)
return arenaobj;
}
/*
address_in_range(P, POOL)
......@@ -750,442 +754,289 @@ address_in_range(void *p, poolp pool)
_PyRuntime.mem.arenas[arenaindex].address != 0;
}
/*==========================================================================*/
/* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
* from all other currently live pointers. This may not be possible.
*/
/* pymalloc allocator
/*
* The basic blocks are ordered by decreasing execution frequency,
* which minimizes the number of jumps in the most common cases,
* improves branching prediction and instruction scheduling (small
* block allocations typically result in a couple of instructions).
* Unless the optimizer reorders everything, being too smart...
*/
The basic blocks are ordered by decreasing execution frequency,
which minimizes the number of jumps in the most common cases,
improves branching prediction and instruction scheduling (small
block allocations typically result in a couple of instructions).
Unless the optimizer reorders everything, being too smart...
static void *
_PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
Return 0 if pymalloc failed to allocate the memory block: on bigger
requests, on error in the code below (as a last chance to serve the request)
or when the max memory limit has been reached. */
static int
pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
{
size_t nbytes;
pyblock *bp;
poolp pool;
poolp next;
uint size;
_PyRuntime.mem.num_allocated_blocks++;
assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
nbytes = nelem * elsize;
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind == -1))
if (UNLIKELY(running_on_valgrind == -1)) {
running_on_valgrind = RUNNING_ON_VALGRIND;
if (UNLIKELY(running_on_valgrind))
goto redirect;
}
if (UNLIKELY(running_on_valgrind)) {
return 0;
}
#endif
if (nelem == 0 || elsize == 0)
goto redirect;
if (nbytes == 0) {
return 0;
}
if (nbytes > SMALL_REQUEST_THRESHOLD) {
return 0;
}
if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
LOCK();
LOCK();
/*
* Most frequent paths first
*/
size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
pool = _PyRuntime.mem.usedpools[size + size];
if (pool != pool->nextpool) {
/*
* Most frequent paths first
* There is a used pool for this size class.
* Pick up the head block of its free list.
*/
size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
pool = _PyRuntime.mem.usedpools[size + size];
if (pool != pool->nextpool) {
/*
* There is a used pool for this size class.
* Pick up the head block of its free list.
*/
++pool->ref.count;
bp = pool->freeblock;
assert(bp != NULL);
if ((pool->freeblock = *(pyblock **)bp) != NULL) {
UNLOCK();
if (use_calloc)
memset(bp, 0, nbytes);
return (void *)bp;
}
/*
* Reached the end of the free list, try to extend it.
*/
if (pool->nextoffset <= pool->maxnextoffset) {
/* There is room for another block. */
pool->freeblock = (pyblock*)pool +
pool->nextoffset;
pool->nextoffset += INDEX2SIZE(size);
*(pyblock **)(pool->freeblock) = NULL;
UNLOCK();
if (use_calloc)
memset(bp, 0, nbytes);
return (void *)bp;
}
/* Pool is full, unlink from used pools. */
next = pool->nextpool;
pool = pool->prevpool;
next->prevpool = pool;
pool->nextpool = next;
UNLOCK();
if (use_calloc)
memset(bp, 0, nbytes);
return (void *)bp;
++pool->ref.count;
bp = pool->freeblock;
assert(bp != NULL);
if ((pool->freeblock = *(pyblock **)bp) != NULL) {
goto success;
}
/* There isn't a pool of the right size class immediately
* available: use a free pool.
/*
* Reached the end of the free list, try to extend it.
*/
if (_PyRuntime.mem.usable_arenas == NULL) {
/* No arena has a free pool: allocate a new arena. */
#ifdef WITH_MEMORY_LIMITS
if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) {
UNLOCK();
goto redirect;
}
#endif
_PyRuntime.mem.usable_arenas = new_arena();
if (_PyRuntime.mem.usable_arenas == NULL) {
UNLOCK();
goto redirect;
}
_PyRuntime.mem.usable_arenas->nextarena =
_PyRuntime.mem.usable_arenas->prevarena = NULL;
}
assert(_PyRuntime.mem.usable_arenas->address != 0);
/* Try to get a cached free pool. */
pool = _PyRuntime.mem.usable_arenas->freepools;
if (pool != NULL) {
/* Unlink from cached pools. */
_PyRuntime.mem.usable_arenas->freepools = pool->nextpool;
/* This arena already had the smallest nfreepools
* value, so decreasing nfreepools doesn't change
* that, and we don't need to rearrange the
* usable_arenas list. However, if the arena has
* become wholly allocated, we need to remove its
* arena_object from usable_arenas.
*/
--_PyRuntime.mem.usable_arenas->nfreepools;
if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
/* Wholly allocated: remove. */
assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
_PyRuntime.mem.usable_arenas->nextarena->prevarena ==
_PyRuntime.mem.usable_arenas);
_PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
if (_PyRuntime.mem.usable_arenas != NULL) {
_PyRuntime.mem.usable_arenas->prevarena = NULL;
assert(_PyRuntime.mem.usable_arenas->address != 0);
}
}
else {
/* nfreepools > 0: it must be that freepools
* isn't NULL, or that we haven't yet carved
* off all the arena's pools for the first
* time.
*/
assert(_PyRuntime.mem.usable_arenas->freepools != NULL ||
_PyRuntime.mem.usable_arenas->pool_address <=
(pyblock*)_PyRuntime.mem.usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
}
init_pool:
/* Frontlink to used pools. */
next = _PyRuntime.mem.usedpools[size + size]; /* == prev */
pool->nextpool = next;
pool->prevpool = next;
next->nextpool = pool;
next->prevpool = pool;
pool->ref.count = 1;
if (pool->szidx == size) {
/* Luckily, this pool last contained blocks
* of the same size class, so its header
* and free list are already initialized.
*/
bp = pool->freeblock;
assert(bp != NULL);
pool->freeblock = *(pyblock **)bp;
UNLOCK();
if (use_calloc)
memset(bp, 0, nbytes);
return (void *)bp;
}
/*
* Initialize the pool header, set up the free list to
* contain just the second block, and return the first
* block.
*/
pool->szidx = size;
size = INDEX2SIZE(size);
bp = (pyblock *)pool + POOL_OVERHEAD;
pool->nextoffset = POOL_OVERHEAD + (size << 1);
pool->maxnextoffset = POOL_SIZE - size;
pool->freeblock = bp + size;
if (pool->nextoffset <= pool->maxnextoffset) {
/* There is room for another block. */
pool->freeblock = (pyblock*)pool +
pool->nextoffset;
pool->nextoffset += INDEX2SIZE(size);
*(pyblock **)(pool->freeblock) = NULL;
UNLOCK();
if (use_calloc)
memset(bp, 0, nbytes);
return (void *)bp;
goto success;
}
/* Carve off a new pool. */
assert(_PyRuntime.mem.usable_arenas->nfreepools > 0);
assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address;
assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas);
assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas);
pool->szidx = DUMMY_SIZE_IDX;
_PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE;
--_PyRuntime.mem.usable_arenas->nfreepools;
/* Pool is full, unlink from used pools. */
next = pool->nextpool;
pool = pool->prevpool;
next->prevpool = pool;
pool->nextpool = next;
goto success;
}
/* There isn't a pool of the right size class immediately
* available: use a free pool.
*/
if (_PyRuntime.mem.usable_arenas == NULL) {
/* No arena has a free pool: allocate a new arena. */
#ifdef WITH_MEMORY_LIMITS
if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) {
goto failed;
}
#endif
_PyRuntime.mem.usable_arenas = new_arena();
if (_PyRuntime.mem.usable_arenas == NULL) {
goto failed;
}
_PyRuntime.mem.usable_arenas->nextarena =
_PyRuntime.mem.usable_arenas->prevarena = NULL;
}
assert(_PyRuntime.mem.usable_arenas->address != 0);
/* Try to get a cached free pool. */
pool = _PyRuntime.mem.usable_arenas->freepools;
if (pool != NULL) {
/* Unlink from cached pools. */
_PyRuntime.mem.usable_arenas->freepools = pool->nextpool;
/* This arena already had the smallest nfreepools
* value, so decreasing nfreepools doesn't change
* that, and we don't need to rearrange the
* usable_arenas list. However, if the arena has
* become wholly allocated, we need to remove its
* arena_object from usable_arenas.
*/
--_PyRuntime.mem.usable_arenas->nfreepools;
if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
/* Wholly allocated: remove. */
assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
_PyRuntime.mem.usable_arenas->nextarena->prevarena ==
_PyRuntime.mem.usable_arenas);
/* Unlink the arena: it is completely allocated. */
_PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
if (_PyRuntime.mem.usable_arenas != NULL) {
_PyRuntime.mem.usable_arenas->prevarena = NULL;
assert(_PyRuntime.mem.usable_arenas->address != 0);
}
}
else {
/* nfreepools > 0: it must be that freepools
* isn't NULL, or that we haven't yet carved
* off all the arena's pools for the first
* time.
*/
assert(_PyRuntime.mem.usable_arenas->freepools != NULL ||
_PyRuntime.mem.usable_arenas->pool_address <=
(pyblock*)_PyRuntime.mem.usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
}
goto init_pool;
init_pool:
/* Frontlink to used pools. */
next = _PyRuntime.mem.usedpools[size + size]; /* == prev */
pool->nextpool = next;
pool->prevpool = next;
next->nextpool = pool;
next->prevpool = pool;
pool->ref.count = 1;
if (pool->szidx == size) {
/* Luckily, this pool last contained blocks
* of the same size class, so its header
* and free list are already initialized.
*/
bp = pool->freeblock;
assert(bp != NULL);
pool->freeblock = *(pyblock **)bp;
goto success;
}
/*
* Initialize the pool header, set up the free list to
* contain just the second block, and return the first
* block.
*/
pool->szidx = size;
size = INDEX2SIZE(size);
bp = (pyblock *)pool + POOL_OVERHEAD;
pool->nextoffset = POOL_OVERHEAD + (size << 1);
pool->maxnextoffset = POOL_SIZE - size;
pool->freeblock = bp + size;
*(pyblock **)(pool->freeblock) = NULL;
goto success;
}
/* The small block allocator ends here. */
redirect:
/* Redirect the original request to the underlying (libc) allocator.
* We jump here on bigger requests, on error in the code above (as a
* last chance to serve the request) or when the max memory limit
* has been reached.
*/
{
void *result;
if (use_calloc)
result = PyMem_RawCalloc(nelem, elsize);
else
result = PyMem_RawMalloc(nbytes);
if (!result)
_PyRuntime.mem.num_allocated_blocks--;
return result;
/* Carve off a new pool. */
assert(_PyRuntime.mem.usable_arenas->nfreepools > 0);
assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address;
assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas);
assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas);
pool->szidx = DUMMY_SIZE_IDX;
_PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE;
--_PyRuntime.mem.usable_arenas->nfreepools;
if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
_PyRuntime.mem.usable_arenas->nextarena->prevarena ==
_PyRuntime.mem.usable_arenas);
/* Unlink the arena: it is completely allocated. */
_PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
if (_PyRuntime.mem.usable_arenas != NULL) {
_PyRuntime.mem.usable_arenas->prevarena = NULL;
assert(_PyRuntime.mem.usable_arenas->address != 0);
}
}
goto init_pool;
success:
UNLOCK();
assert(bp != NULL);
*ptr_p = (void *)bp;
return 1;
failed:
UNLOCK();
return 0;
}
static void *
_PyObject_Malloc(void *ctx, size_t nbytes)
{
return _PyObject_Alloc(0, ctx, 1, nbytes);
void* ptr;
if (pymalloc_alloc(ctx, &ptr, nbytes)) {
_PyRuntime.mem.num_allocated_blocks++;
return ptr;
}
ptr = PyMem_RawMalloc(nbytes);
if (ptr != NULL) {
_PyRuntime.mem.num_allocated_blocks++;
}
return ptr;
}
static void *
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
{
return _PyObject_Alloc(1, ctx, nelem, elsize);
void* ptr;
assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
size_t nbytes = nelem * elsize;
if (pymalloc_alloc(ctx, &ptr, nbytes)) {
memset(ptr, 0, nbytes);
_PyRuntime.mem.num_allocated_blocks++;
return ptr;
}
ptr = PyMem_RawCalloc(nelem, elsize);
if (ptr != NULL) {
_PyRuntime.mem.num_allocated_blocks++;
}
return ptr;
}
/* free */
static void
_PyObject_Free(void *ctx, void *p)
/* Free a memory block allocated by pymalloc_alloc().
Return 1 if it was freed.
Return 0 if the block was not allocated by pymalloc_alloc(). */
static int
pymalloc_free(void *ctx, void *p)
{
poolp pool;
pyblock *lastfree;
poolp next, prev;
uint size;
if (p == NULL) /* free(NULL) has no effect */
return;
_PyRuntime.mem.num_allocated_blocks--;
assert(p != NULL);
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind > 0))
goto redirect;
if (UNLIKELY(running_on_valgrind > 0)) {
return 0;
}
#endif
pool = POOL_ADDR(p);
if (address_in_range(p, pool)) {
/* We allocated this address. */
LOCK();
/* Link p to the start of the pool's freeblock list. Since
* the pool had at least the p block outstanding, the pool
* wasn't empty (so it's already in a usedpools[] list, or
* was full and is in no list -- it's not in the freeblocks
* list in any case).
*/
assert(pool->ref.count > 0); /* else it was empty */
*(pyblock **)p = lastfree = pool->freeblock;
pool->freeblock = (pyblock *)p;
if (lastfree) {
struct arena_object* ao;
uint nf; /* ao->nfreepools */
/* freeblock wasn't NULL, so the pool wasn't full,
* and the pool is in a usedpools[] list.
*/
if (--pool->ref.count != 0) {
/* pool isn't empty: leave it in usedpools */
UNLOCK();
return;
}
/* Pool is now empty: unlink from usedpools, and
* link to the front of freepools. This ensures that
* previously freed pools will be allocated later
* (being not referenced, they are perhaps paged out).
*/
next = pool->nextpool;
prev = pool->prevpool;
next->prevpool = prev;
prev->nextpool = next;
/* Link the pool to freepools. This is a singly-linked
* list, and pool->prevpool isn't used there.
*/
ao = &_PyRuntime.mem.arenas[pool->arenaindex];
pool->nextpool = ao->freepools;
ao->freepools = pool;
nf = ++ao->nfreepools;
/* All the rest is arena management. We just freed
* a pool, and there are 4 cases for arena mgmt:
* 1. If all the pools are free, return the arena to
* the system free().
* 2. If this is the only free pool in the arena,
* add the arena back to the `usable_arenas` list.
* 3. If the "next" arena has a smaller count of free
* pools, we have to "slide this arena right" to
* restore that usable_arenas is sorted in order of
* nfreepools.
* 4. Else there's nothing more to do.
*/
if (nf == ao->ntotalpools) {
/* Case 1. First unlink ao from usable_arenas.
*/
assert(ao->prevarena == NULL ||
ao->prevarena->address != 0);
assert(ao ->nextarena == NULL ||
ao->nextarena->address != 0);
/* Fix the pointer in the prevarena, or the
* usable_arenas pointer.
*/
if (ao->prevarena == NULL) {
_PyRuntime.mem.usable_arenas = ao->nextarena;
assert(_PyRuntime.mem.usable_arenas == NULL ||
_PyRuntime.mem.usable_arenas->address != 0);
}
else {
assert(ao->prevarena->nextarena == ao);
ao->prevarena->nextarena =
ao->nextarena;
}
/* Fix the pointer in the nextarena. */
if (ao->nextarena != NULL) {
assert(ao->nextarena->prevarena == ao);
ao->nextarena->prevarena =
ao->prevarena;
}
/* Record that this arena_object slot is
* available to be reused.
*/
ao->nextarena = _PyRuntime.mem.unused_arena_objects;
_PyRuntime.mem.unused_arena_objects = ao;
/* Free the entire arena. */
_PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx,
(void *)ao->address, ARENA_SIZE);
ao->address = 0; /* mark unassociated */
--_PyRuntime.mem.narenas_currently_allocated;
UNLOCK();
return;
}
if (nf == 1) {
/* Case 2. Put ao at the head of
* usable_arenas. Note that because
* ao->nfreepools was 0 before, ao isn't
* currently on the usable_arenas list.
*/
ao->nextarena = _PyRuntime.mem.usable_arenas;
ao->prevarena = NULL;
if (_PyRuntime.mem.usable_arenas)
_PyRuntime.mem.usable_arenas->prevarena = ao;
_PyRuntime.mem.usable_arenas = ao;
assert(_PyRuntime.mem.usable_arenas->address != 0);
UNLOCK();
return;
}
/* If this arena is now out of order, we need to keep
* the list sorted. The list is kept sorted so that
* the "most full" arenas are used first, which allows
* the nearly empty arenas to be completely freed. In
* a few un-scientific tests, it seems like this
* approach allowed a lot more memory to be freed.
*/
if (ao->nextarena == NULL ||
nf <= ao->nextarena->nfreepools) {
/* Case 4. Nothing to do. */
UNLOCK();
return;
}
/* Case 3: We have to move the arena towards the end
* of the list, because it has more free pools than
* the arena to its right.
* First unlink ao from usable_arenas.
*/
if (ao->prevarena != NULL) {
/* ao isn't at the head of the list */
assert(ao->prevarena->nextarena == ao);
ao->prevarena->nextarena = ao->nextarena;
}
else {
/* ao is at the head of the list */
assert(_PyRuntime.mem.usable_arenas == ao);
_PyRuntime.mem.usable_arenas = ao->nextarena;
}
ao->nextarena->prevarena = ao->prevarena;
if (!address_in_range(p, pool)) {
return 0;
}
/* We allocated this address. */
/* Locate the new insertion point by iterating over
* the list, using our nextarena pointer.
*/
while (ao->nextarena != NULL &&
nf > ao->nextarena->nfreepools) {
ao->prevarena = ao->nextarena;
ao->nextarena = ao->nextarena->nextarena;
}
LOCK();
/* Insert ao at this point. */
assert(ao->nextarena == NULL ||
ao->prevarena == ao->nextarena->prevarena);
assert(ao->prevarena->nextarena == ao->nextarena);
ao->prevarena->nextarena = ao;
if (ao->nextarena != NULL)
ao->nextarena->prevarena = ao;
/* Verify that the swaps worked. */
assert(ao->nextarena == NULL ||
nf <= ao->nextarena->nfreepools);
assert(ao->prevarena == NULL ||
nf > ao->prevarena->nfreepools);
assert(ao->nextarena == NULL ||
ao->nextarena->prevarena == ao);
assert((_PyRuntime.mem.usable_arenas == ao &&
ao->prevarena == NULL) ||
ao->prevarena->nextarena == ao);
UNLOCK();
return;
}
/* Link p to the start of the pool's freeblock list. Since
* the pool had at least the p block outstanding, the pool
* wasn't empty (so it's already in a usedpools[] list, or
* was full and is in no list -- it's not in the freeblocks
* list in any case).
*/
assert(pool->ref.count > 0); /* else it was empty */
*(pyblock **)p = lastfree = pool->freeblock;
pool->freeblock = (pyblock *)p;
if (!lastfree) {
/* Pool was full, so doesn't currently live in any list:
* link it to the front of the appropriate usedpools[] list.
* This mimics LRU pool usage for new allocations and
......@@ -1197,93 +1048,274 @@ _PyObject_Free(void *ctx, void *p)
size = pool->szidx;
next = _PyRuntime.mem.usedpools[size + size];
prev = next->prevpool;
/* insert pool before next: prev <-> pool <-> next */
pool->nextpool = next;
pool->prevpool = prev;
next->prevpool = pool;
prev->nextpool = pool;
UNLOCK();
goto success;
}
struct arena_object* ao;
uint nf; /* ao->nfreepools */
/* freeblock wasn't NULL, so the pool wasn't full,
* and the pool is in a usedpools[] list.
*/
if (--pool->ref.count != 0) {
/* pool isn't empty: leave it in usedpools */
goto success;
}
/* Pool is now empty: unlink from usedpools, and
* link to the front of freepools. This ensures that
* previously freed pools will be allocated later
* (being not referenced, they are perhaps paged out).
*/
next = pool->nextpool;
prev = pool->prevpool;
next->prevpool = prev;
prev->nextpool = next;
/* Link the pool to freepools. This is a singly-linked
* list, and pool->prevpool isn't used there.
*/
ao = &_PyRuntime.mem.arenas[pool->arenaindex];
pool->nextpool = ao->freepools;
ao->freepools = pool;
nf = ++ao->nfreepools;
/* All the rest is arena management. We just freed
* a pool, and there are 4 cases for arena mgmt:
* 1. If all the pools are free, return the arena to
* the system free().
* 2. If this is the only free pool in the arena,
* add the arena back to the `usable_arenas` list.
* 3. If the "next" arena has a smaller count of free
* pools, we have to "slide this arena right" to
* restore that usable_arenas is sorted in order of
* nfreepools.
* 4. Else there's nothing more to do.
*/
if (nf == ao->ntotalpools) {
/* Case 1. First unlink ao from usable_arenas.
*/
assert(ao->prevarena == NULL ||
ao->prevarena->address != 0);
assert(ao ->nextarena == NULL ||
ao->nextarena->address != 0);
/* Fix the pointer in the prevarena, or the
* usable_arenas pointer.
*/
if (ao->prevarena == NULL) {
_PyRuntime.mem.usable_arenas = ao->nextarena;
assert(_PyRuntime.mem.usable_arenas == NULL ||
_PyRuntime.mem.usable_arenas->address != 0);
}
else {
assert(ao->prevarena->nextarena == ao);
ao->prevarena->nextarena =
ao->nextarena;
}
/* Fix the pointer in the nextarena. */
if (ao->nextarena != NULL) {
assert(ao->nextarena->prevarena == ao);
ao->nextarena->prevarena =
ao->prevarena;
}
/* Record that this arena_object slot is
* available to be reused.
*/
ao->nextarena = _PyRuntime.mem.unused_arena_objects;
_PyRuntime.mem.unused_arena_objects = ao;
/* Free the entire arena. */
_PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx,
(void *)ao->address, ARENA_SIZE);
ao->address = 0; /* mark unassociated */
--_PyRuntime.mem.narenas_currently_allocated;
goto success;
}
if (nf == 1) {
/* Case 2. Put ao at the head of
* usable_arenas. Note that because
* ao->nfreepools was 0 before, ao isn't
* currently on the usable_arenas list.
*/
ao->nextarena = _PyRuntime.mem.usable_arenas;
ao->prevarena = NULL;
if (_PyRuntime.mem.usable_arenas)
_PyRuntime.mem.usable_arenas->prevarena = ao;
_PyRuntime.mem.usable_arenas = ao;
assert(_PyRuntime.mem.usable_arenas->address != 0);
goto success;
}
/* If this arena is now out of order, we need to keep
* the list sorted. The list is kept sorted so that
* the "most full" arenas are used first, which allows
* the nearly empty arenas to be completely freed. In
* a few un-scientific tests, it seems like this
* approach allowed a lot more memory to be freed.
*/
if (ao->nextarena == NULL ||
nf <= ao->nextarena->nfreepools) {
/* Case 4. Nothing to do. */
goto success;
}
/* Case 3: We have to move the arena towards the end
* of the list, because it has more free pools than
* the arena to its right.
* First unlink ao from usable_arenas.
*/
if (ao->prevarena != NULL) {
/* ao isn't at the head of the list */
assert(ao->prevarena->nextarena == ao);
ao->prevarena->nextarena = ao->nextarena;
}
else {
/* ao is at the head of the list */
assert(_PyRuntime.mem.usable_arenas == ao);
_PyRuntime.mem.usable_arenas = ao->nextarena;
}
ao->nextarena->prevarena = ao->prevarena;
/* Locate the new insertion point by iterating over
* the list, using our nextarena pointer.
*/
while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) {
ao->prevarena = ao->nextarena;
ao->nextarena = ao->nextarena->nextarena;
}
/* Insert ao at this point. */
assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena);
assert(ao->prevarena->nextarena == ao->nextarena);
ao->prevarena->nextarena = ao;
if (ao->nextarena != NULL) {
ao->nextarena->prevarena = ao;
}
/* Verify that the swaps worked. */
assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
assert((_PyRuntime.mem.usable_arenas == ao && ao->prevarena == NULL)
|| ao->prevarena->nextarena == ao);
goto success;
success:
UNLOCK();
return 1;
}
static void
_PyObject_Free(void *ctx, void *p)
{
/* PyObject_Free(NULL) has no effect */
if (p == NULL) {
return;
}
#ifdef WITH_VALGRIND
redirect:
#endif
/* We didn't allocate this address. */
PyMem_RawFree(p);
_PyRuntime.mem.num_allocated_blocks--;
if (!pymalloc_free(ctx, p)) {
/* pymalloc didn't allocate this address */
PyMem_RawFree(p);
}
}
/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
* then as the Python docs promise, we do not treat this like free(p), and
* return a non-NULL result.
*/
static void *
_PyObject_Realloc(void *ctx, void *p, size_t nbytes)
/* pymalloc realloc.
If nbytes==0, then as the Python docs promise, we do not treat this like
free(p), and return a non-NULL result.
Return 1 if pymalloc reallocated memory and wrote the new pointer into
newptr_p.
Return 0 if pymalloc didn't allocated p. */
static int
pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
{
void *bp;
poolp pool;
size_t size;
if (p == NULL)
return _PyObject_Alloc(0, ctx, 1, nbytes);
assert(p != NULL);
#ifdef WITH_VALGRIND
/* Treat running_on_valgrind == -1 the same as 0 */
if (UNLIKELY(running_on_valgrind > 0))
goto redirect;
if (UNLIKELY(running_on_valgrind > 0)) {
return 0;
}
#endif
pool = POOL_ADDR(p);
if (address_in_range(p, pool)) {
/* We're in charge of this block */
size = INDEX2SIZE(pool->szidx);
if (nbytes <= size) {
/* The block is staying the same or shrinking. If
* it's shrinking, there's a tradeoff: it costs
* cycles to copy the block to a smaller size class,
* but it wastes memory not to copy it. The
* compromise here is to copy on shrink only if at
* least 25% of size can be shaved off.
*/
if (4 * nbytes > 3 * size) {
/* It's the same,
* or shrinking and new/old > 3/4.
*/
return p;
}
size = nbytes;
}
bp = _PyObject_Alloc(0, ctx, 1, nbytes);
if (bp != NULL) {
memcpy(bp, p, size);
_PyObject_Free(ctx, p);
if (!address_in_range(p, pool)) {
/* pymalloc is not managing this block.
If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
over this block. However, if we do, we need to copy the valid data
from the C-managed block to one of our blocks, and there's no
portable way to know how much of the memory space starting at p is
valid.
As bug 1185883 pointed out the hard way, it's possible that the
C-managed block is "at the end" of allocated VM space, so that a
memory fault can occur if we try to copy nbytes bytes starting at p.
Instead we punt: let C continue to manage this block. */
return 0;
}
/* pymalloc is in charge of this block */
size = INDEX2SIZE(pool->szidx);
if (nbytes <= size) {
/* The block is staying the same or shrinking.
If it's shrinking, there's a tradeoff: it costs cycles to copy the
block to a smaller size class, but it wastes memory not to copy it.
The compromise here is to copy on shrink only if at least 25% of
size can be shaved off. */
if (4 * nbytes > 3 * size) {
/* It's the same, or shrinking and new/old > 3/4. */
*newptr_p = p;
return 1;
}
return bp;
size = nbytes;
}
#ifdef WITH_VALGRIND
redirect:
#endif
/* We're not managing this block. If nbytes <=
* SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
* block. However, if we do, we need to copy the valid data from
* the C-managed block to one of our blocks, and there's no portable
* way to know how much of the memory space starting at p is valid.
* As bug 1185883 pointed out the hard way, it's possible that the
* C-managed block is "at the end" of allocated VM space, so that
* a memory fault can occur if we try to copy nbytes bytes starting
* at p. Instead we punt: let C continue to manage this block.
*/
if (nbytes)
return PyMem_RawRealloc(p, nbytes);
/* C doesn't define the result of realloc(p, 0) (it may or may not
* return NULL then), but Python's docs promise that nbytes==0 never
* returns NULL. We don't pass 0 to realloc(), to avoid that endcase
* to begin with. Even then, we can't be sure that realloc() won't
* return NULL.
*/
bp = PyMem_RawRealloc(p, 1);
return bp ? bp : p;
bp = _PyObject_Malloc(ctx, nbytes);
if (bp != NULL) {
memcpy(bp, p, size);
_PyObject_Free(ctx, p);
}
*newptr_p = bp;
return 1;
}
static void *
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
{
void *ptr2;
if (ptr == NULL) {
return _PyObject_Malloc(ctx, nbytes);
}
if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
return ptr2;
}
return PyMem_RawRealloc(ptr, nbytes);
}
#else /* ! WITH_PYMALLOC */
......@@ -1386,37 +1418,53 @@ static void *
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
{
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uint8_t *p; /* base address of malloc'ed block */
uint8_t *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */
size_t total; /* nbytes + 4*SST */
uint8_t *p; /* base address of malloc'epad d block */
uint8_t *data; /* p + 2*SST == pointer to data bytes */
uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
size_t total; /* 2 * SST + nbytes + 2 * SST */
bumpserialno();
total = nbytes + 4*SST;
if (nbytes > PY_SSIZE_T_MAX - 4*SST)
/* overflow: can't represent total as a Py_ssize_t */
if (nbytes > (size_t)PY_SSIZE_T_MAX - 4 * SST) {
/* integer overflow: can't represent total as a Py_ssize_t */
return NULL;
}
total = nbytes + 4 * SST;
if (use_calloc)
/* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
* ^--- p ^--- data ^--- tail
S: nbytes stored as size_t
I: API identifier (1 byte)
F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
C: Clean bytes used later to store actual data
N: Serial number stored as size_t */
if (use_calloc) {
p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
else
}
else {
p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
if (p == NULL)
}
if (p == NULL) {
return NULL;
}
data = p + 2*SST;
bumpserialno();
/* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
write_size_t(p, nbytes);
p[SST] = (uint8_t)api->api_id;
memset(p + SST + 1, FORBIDDENBYTE, SST-1);
if (nbytes > 0 && !use_calloc)
memset(p + 2*SST, CLEANBYTE, nbytes);
if (nbytes > 0 && !use_calloc) {
memset(data, CLEANBYTE, nbytes);
}
/* at tail, write pad (SST bytes) and serialno (SST bytes) */
tail = p + 2*SST + nbytes;
tail = data + nbytes;
memset(tail, FORBIDDENBYTE, SST);
write_size_t(tail + SST, _PyRuntime.mem.serialno);
return p + 2*SST;
return data;
}
static void *
......@@ -1429,11 +1477,12 @@ static void *
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
{
size_t nbytes;
assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
nbytes = nelem * elsize;
return _PyMem_DebugRawAlloc(1, ctx, nbytes);
}
/* The debug free first checks the 2*SST bytes on each end for sanity (in
particular, that the FORBIDDENBYTEs with the api ID are still intact).
Then fills the original bytes with DEADBYTE.
......@@ -1442,63 +1491,73 @@ _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
static void
_PyMem_DebugRawFree(void *ctx, void *p)
{
/* PyMem_Free(NULL) has no effect */
if (p == NULL) {
return;
}
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
size_t nbytes;
if (p == NULL)
return;
_PyMem_DebugCheckAddress(api->api_id, p);
nbytes = read_size_t(q);
nbytes += 4*SST;
if (nbytes > 0)
memset(q, DEADBYTE, nbytes);
nbytes += 4 * SST;
memset(q, DEADBYTE, nbytes);
api->alloc.free(api->alloc.ctx, q);
}
static void *
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
{
if (p == NULL) {
return _PyMem_DebugRawAlloc(0, ctx, nbytes);
}
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
uint8_t *q = (uint8_t *)p;
uint8_t *tail;
size_t total; /* nbytes + 4*SST */
uint8_t *q; /* base address of malloc'epad d block */
uint8_t *data; /* p + 2*SST == pointer to data bytes */
uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
size_t total; /* 2 * SST + nbytes + 2 * SST */
size_t original_nbytes;
int i;
if (p == NULL)
return _PyMem_DebugRawAlloc(0, ctx, nbytes);
_PyMem_DebugCheckAddress(api->api_id, p);
bumpserialno();
q = (uint8_t *)p;
original_nbytes = read_size_t(q - 2*SST);
total = nbytes + 4*SST;
if (nbytes > PY_SSIZE_T_MAX - 4*SST)
/* overflow: can't represent total as a Py_ssize_t */
if (nbytes > (size_t)PY_SSIZE_T_MAX - 4*SST) {
/* integer overflow: can't represent total as a Py_ssize_t */
return NULL;
}
total = nbytes + 4*SST;
/* Resize and add decorations. */
q = (uint8_t *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
if (q == NULL)
if (q == NULL) {
return NULL;
}
bumpserialno();
write_size_t(q, nbytes);
assert(q[SST] == (uint8_t)api->api_id);
for (i = 1; i < SST; ++i)
for (i = 1; i < SST; ++i) {
assert(q[SST + i] == FORBIDDENBYTE);
q += 2*SST;
}
data = q + 2*SST;
tail = q + nbytes;
tail = data + nbytes;
memset(tail, FORBIDDENBYTE, SST);
write_size_t(tail + SST, _PyRuntime.mem.serialno);
if (nbytes > original_nbytes) {
/* growing: mark new extra memory clean */
memset(q + original_nbytes, CLEANBYTE,
memset(data + original_nbytes, CLEANBYTE,
nbytes - original_nbytes);
}
return q;
return data;
}
static void
......@@ -1523,6 +1582,7 @@ _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
}
static void
_PyMem_DebugFree(void *ctx, void *ptr)
{
......@@ -1530,6 +1590,7 @@ _PyMem_DebugFree(void *ctx, void *ptr)
_PyMem_DebugRawFree(ctx, ptr);
}
static void *
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment