accu.c 2.4 KB
Newer Older
1 2 3
/* Accumulator struct implementation */

#include "Python.h"
4
#include "accu.h"
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

static PyObject *
join_list_unicode(PyObject *lst)
{
    /* return ''.join(lst) */
    PyObject *sep, *ret;
    sep = PyUnicode_FromStringAndSize("", 0);
    ret = PyUnicode_Join(sep, lst);
    Py_DECREF(sep);
    return ret;
}

int
_PyAccu_Init(_PyAccu *acc)
{
    /* Lazily allocated */
    acc->large = NULL;
    acc->small = PyList_New(0);
    if (acc->small == NULL)
        return -1;
    return 0;
}

static int
flush_accumulator(_PyAccu *acc)
{
    Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
    if (nsmall) {
        int ret;
        PyObject *joined;
        if (acc->large == NULL) {
            acc->large = PyList_New(0);
            if (acc->large == NULL)
                return -1;
        }
        joined = join_list_unicode(acc->small);
        if (joined == NULL)
            return -1;
        if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
            Py_DECREF(joined);
            return -1;
        }
        ret = PyList_Append(acc->large, joined);
        Py_DECREF(joined);
        return ret;
    }
    return 0;
}

int
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
{
    Py_ssize_t nsmall;
    assert(PyUnicode_Check(unicode));

    if (PyList_Append(acc->small, unicode))
        return -1;
    nsmall = PyList_GET_SIZE(acc->small);
    /* Each item in a list of unicode objects has an overhead (in 64-bit
     * builds) of:
     *   - 8 bytes for the list slot
     *   - 56 bytes for the header of the unicode object
     * that is, 64 bytes.  100000 such objects waste more than 6MB
     * compared to a single concatenated string.
     */
    if (nsmall < 100000)
        return 0;
    return flush_accumulator(acc);
}

PyObject *
_PyAccu_FinishAsList(_PyAccu *acc)
{
    int ret;
    PyObject *res;

    ret = flush_accumulator(acc);
    Py_CLEAR(acc->small);
    if (ret) {
        Py_CLEAR(acc->large);
        return NULL;
    }
    res = acc->large;
    acc->large = NULL;
    return res;
}

PyObject *
_PyAccu_Finish(_PyAccu *acc)
{
    PyObject *list, *res;
    if (acc->large == NULL) {
        list = acc->small;
        acc->small = NULL;
    }
    else {
        list = _PyAccu_FinishAsList(acc);
        if (!list)
            return NULL;
    }
    res = join_list_unicode(list);
    Py_DECREF(list);
    return res;
}

void
_PyAccu_Destroy(_PyAccu *acc)
{
    Py_CLEAR(acc->small);
    Py_CLEAR(acc->large);
}