stringio.c 30.4 KB
Newer Older
1
#define PY_SSIZE_T_CLEAN
2
#include "Python.h"
3
#include "structmember.h"
4
#include "accu.h"
5
#include "_iomodule.h"
6

7 8 9
/* Implementation note: the buffer is always at least one character longer
   than the enclosed string, for proper functioning of _PyIO_find_line_ending.
*/
10

11 12 13
#define STATE_REALIZED 1
#define STATE_ACCUMULATING 2

14 15
typedef struct {
    PyObject_HEAD
Martin v. Löwis's avatar
Martin v. Löwis committed
16
    Py_UCS4 *buf;
17 18 19
    Py_ssize_t pos;
    Py_ssize_t string_size;
    size_t buf_size;
20

21 22 23 24 25 26 27 28 29
    /* The stringio object can be in two states: accumulating or realized.
       In accumulating state, the internal buffer contains nothing and
       the contents are given by the embedded _PyAccu structure.
       In realized state, the internal buffer is meaningful and the
       _PyAccu is destroyed.
    */
    int state;
    _PyAccu accu;

30 31 32 33 34 35 36
    char ok; /* initialized? */
    char closed;
    char readuniversal;
    char readtranslate;
    PyObject *decoder;
    PyObject *readnl;
    PyObject *writenl;
Martin v. Löwis's avatar
Martin v. Löwis committed
37

38 39
    PyObject *dict;
    PyObject *weakreflist;
40
} stringio;
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55
#define CHECK_INITIALIZED(self) \
    if (self->ok <= 0) { \
        PyErr_SetString(PyExc_ValueError, \
            "I/O operation on uninitialized object"); \
        return NULL; \
    }

#define CHECK_CLOSED(self) \
    if (self->closed) { \
        PyErr_SetString(PyExc_ValueError, \
            "I/O operation on closed file"); \
        return NULL; \
    }

56 57 58 59 60
#define ENSURE_REALIZED(self) \
    if (realize(self) < 0) { \
        return NULL; \
    }

61 62 63 64 65 66
PyDoc_STRVAR(stringio_doc,
    "Text I/O implementation using an in-memory buffer.\n"
    "\n"
    "The initial_value argument sets the value of object.  The newline\n"
    "argument is like the one of TextIOWrapper's constructor.");

67 68 69 70 71

/* Internal routine for changing the size, in terms of characters, of the
   buffer of StringIO objects.  The caller should ensure that the 'size'
   argument is non-negative.  Returns 0 on success, -1 otherwise. */
static int
72
resize_buffer(stringio *self, size_t size)
73 74 75 76
{
    /* Here, unsigned types are used to avoid dealing with signed integer
       overflow, which is undefined in C. */
    size_t alloc = self->buf_size;
Martin v. Löwis's avatar
Martin v. Löwis committed
77
    Py_UCS4 *new_buf = NULL;
78 79 80

    assert(self->buf != NULL);

81 82
    /* Reserve one more char for line ending detection. */
    size = size + 1;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    /* For simplicity, stay in the range of the signed type. Anyway, Python
       doesn't allow strings to be longer than this. */
    if (size > PY_SSIZE_T_MAX)
        goto overflow;

    if (size < alloc / 2) {
        /* Major downsize; resize down to exact size. */
        alloc = size + 1;
    }
    else if (size < alloc) {
        /* Within allocated size; quick exit */
        return 0;
    }
    else if (size <= alloc * 1.125) {
        /* Moderate upsize; overallocate similar to list_resize() */
        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
    }
    else {
        /* Major upsize; resize up to exact size */
        alloc = size + 1;
    }

Martin v. Löwis's avatar
Martin v. Löwis committed
105
    if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
106
        goto overflow;
Martin v. Löwis's avatar
Martin v. Löwis committed
107
    new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
    if (new_buf == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    self->buf_size = alloc;
    self->buf = new_buf;

    return 0;

  overflow:
    PyErr_SetString(PyExc_OverflowError,
                    "new buffer size too large");
    return -1;
}

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
static PyObject *
make_intermediate(stringio *self)
{
    PyObject *intermediate = _PyAccu_Finish(&self->accu);
    self->state = STATE_REALIZED;
    if (intermediate == NULL)
        return NULL;
    if (_PyAccu_Init(&self->accu) ||
        _PyAccu_Accumulate(&self->accu, intermediate)) {
        Py_DECREF(intermediate);
        return NULL;
    }
    self->state = STATE_ACCUMULATING;
    return intermediate;
}

static int
realize(stringio *self)
{
    Py_ssize_t len;
    PyObject *intermediate;

    if (self->state == STATE_REALIZED)
        return 0;
    assert(self->state == STATE_ACCUMULATING);
    self->state = STATE_REALIZED;

    intermediate = _PyAccu_Finish(&self->accu);
    if (intermediate == NULL)
        return -1;

    /* Append the intermediate string to the internal buffer.
       The length should be equal to the current cursor position.
     */
    len = PyUnicode_GET_LENGTH(intermediate);
    if (resize_buffer(self, len) < 0) {
        Py_DECREF(intermediate);
        return -1;
    }
    if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
        Py_DECREF(intermediate);
        return -1;
    }

    Py_DECREF(intermediate);
    return 0;
}

171 172
/* Internal routine for writing a whole PyUnicode object to the buffer of a
   StringIO object. Returns 0 on success, or -1 on error. */
173
static Py_ssize_t
174
write_str(stringio *self, PyObject *obj)
175
{
176 177
    Py_ssize_t len;
    PyObject *decoded = NULL;
Martin v. Löwis's avatar
Martin v. Löwis committed
178

179 180
    assert(self->buf != NULL);
    assert(self->pos >= 0);
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

    if (self->decoder != NULL) {
        decoded = _PyIncrementalNewlineDecoder_decode(
            self->decoder, obj, 1 /* always final */);
    }
    else {
        decoded = obj;
        Py_INCREF(decoded);
    }
    if (self->writenl) {
        PyObject *translated = PyUnicode_Replace(
            decoded, _PyIO_str_nl, self->writenl, -1);
        Py_DECREF(decoded);
        decoded = translated;
    }
    if (decoded == NULL)
        return -1;

    assert(PyUnicode_Check(decoded));
200 201 202 203
    if (PyUnicode_READY(decoded)) {
        Py_DECREF(decoded);
        return -1;
    }
Martin v. Löwis's avatar
Martin v. Löwis committed
204
    len = PyUnicode_GET_LENGTH(decoded);
205 206 207 208 209 210 211 212
    assert(len >= 0);

    /* This overflow check is not strictly necessary. However, it avoids us to
       deal with funky things like comparing an unsigned and a signed
       integer. */
    if (self->pos > PY_SSIZE_T_MAX - len) {
        PyErr_SetString(PyExc_OverflowError,
                        "new position too large");
213
        goto fail;
214
    }
215 216 217 218 219 220 221 222 223 224 225

    if (self->state == STATE_ACCUMULATING) {
        if (self->string_size == self->pos) {
            if (_PyAccu_Accumulate(&self->accu, decoded))
                goto fail;
            goto success;
        }
        if (realize(self))
            goto fail;
    }

226 227
    if (self->pos + len > self->string_size) {
        if (resize_buffer(self, self->pos + len) < 0)
228
            goto fail;
229 230 231 232 233 234 235 236 237
    }

    if (self->pos > self->string_size) {
        /* In case of overseek, pad with null bytes the buffer region between
           the end of stream and the current position.

          0   lo      string_size                           hi
          |   |<---used--->|<----------available----------->|
          |   |            <--to pad-->|<---to write--->    |
238
          0   buf                   position
239 240 241

        */
        memset(self->buf + self->string_size, '\0',
Martin v. Löwis's avatar
Martin v. Löwis committed
242
               (self->pos - self->string_size) * sizeof(Py_UCS4));
243 244 245 246
    }

    /* Copy the data to the internal buffer, overwriting some of the
       existing data if self->pos < self->string_size. */
Martin v. Löwis's avatar
Martin v. Löwis committed
247 248 249 250 251
    if (!PyUnicode_AsUCS4(decoded,
                          self->buf + self->pos,
                          self->buf_size - self->pos,
                          0))
        goto fail;
252

253
success:
254
    /* Set the new length of the internal string if it has changed. */
Martin v. Löwis's avatar
Martin v. Löwis committed
255 256
    self->pos += len;
    if (self->string_size < self->pos)
257 258
        self->string_size = self->pos;

259 260 261 262 263 264
    Py_DECREF(decoded);
    return 0;

fail:
    Py_XDECREF(decoded);
    return -1;
265 266
}

267 268 269
PyDoc_STRVAR(stringio_getvalue_doc,
    "Retrieve the entire contents of the object.");

270
static PyObject *
271
stringio_getvalue(stringio *self)
272
{
273 274
    CHECK_INITIALIZED(self);
    CHECK_CLOSED(self);
275 276
    if (self->state == STATE_ACCUMULATING)
        return make_intermediate(self);
Martin v. Löwis's avatar
Martin v. Löwis committed
277 278
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
                                     self->string_size);
279 280
}

281 282 283
PyDoc_STRVAR(stringio_tell_doc,
    "Tell the current file position.");

284
static PyObject *
285
stringio_tell(stringio *self)
286
{
287 288
    CHECK_INITIALIZED(self);
    CHECK_CLOSED(self);
289 290 291
    return PyLong_FromSsize_t(self->pos);
}

292 293 294 295 296 297
PyDoc_STRVAR(stringio_read_doc,
    "Read at most n characters, returned as a string.\n"
    "\n"
    "If the argument is negative or omitted, read until EOF\n"
    "is reached. Return an empty string at EOF.\n");

298
static PyObject *
299
stringio_read(stringio *self, PyObject *args)
300 301
{
    Py_ssize_t size, n;
Martin v. Löwis's avatar
Martin v. Löwis committed
302
    Py_UCS4 *output;
303 304
    PyObject *arg = Py_None;

305
    CHECK_INITIALIZED(self);
306 307
    if (!PyArg_ParseTuple(args, "|O:read", &arg))
        return NULL;
308
    CHECK_CLOSED(self);
309

310 311
    if (PyNumber_Check(arg)) {
        size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
312 313
        if (size == -1 && PyErr_Occurred())
            return NULL;
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
    }
    else if (arg == Py_None) {
        /* Read until EOF is reached, by default. */
        size = -1;
    }
    else {
        PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
                     Py_TYPE(arg)->tp_name);
        return NULL;
    }

    /* adjust invalid sizes */
    n = self->string_size - self->pos;
    if (size < 0 || size > n) {
        size = n;
        if (size < 0)
            size = 0;
    }

333 334 335 336 337 338 339 340
    /* Optimization for seek(0); read() */
    if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
        PyObject *result = make_intermediate(self);
        self->pos = self->string_size;
        return result;
    }

    ENSURE_REALIZED(self);
341 342
    output = self->buf + self->pos;
    self->pos += size;
Martin v. Löwis's avatar
Martin v. Löwis committed
343
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
344 345
}

346 347
/* Internal helper, used by stringio_readline and stringio_iternext */
static PyObject *
348
_stringio_readline(stringio *self, Py_ssize_t limit)
349
{
Martin v. Löwis's avatar
Martin v. Löwis committed
350
    Py_UCS4 *start, *end, old_char;
351 352 353 354
    Py_ssize_t len, consumed;

    /* In case of overseek, return the empty string */
    if (self->pos >= self->string_size)
Martin v. Löwis's avatar
Martin v. Löwis committed
355
        return PyUnicode_New(0, 0);
356 357 358 359 360 361 362 363 364 365

    start = self->buf + self->pos;
    if (limit < 0 || limit > self->string_size - self->pos)
        limit = self->string_size - self->pos;

    end = start + limit;
    old_char = *end;
    *end = '\0';
    len = _PyIO_find_line_ending(
        self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwis's avatar
Martin v. Löwis committed
366
        PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
367 368 369 370 371 372
    *end = old_char;
    /* If we haven't found any line ending, we just return everything
       (`consumed` is ignored). */
    if (len < 0)
        len = limit;
    self->pos += len;
Martin v. Löwis's avatar
Martin v. Löwis committed
373
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
374 375 376 377 378 379 380 381
}

PyDoc_STRVAR(stringio_readline_doc,
    "Read until newline or EOF.\n"
    "\n"
    "Returns an empty string if EOF is hit immediately.\n");

static PyObject *
382
stringio_readline(stringio *self, PyObject *args)
383 384 385 386 387 388 389 390
{
    PyObject *arg = Py_None;
    Py_ssize_t limit = -1;

    CHECK_INITIALIZED(self);
    if (!PyArg_ParseTuple(args, "|O:readline", &arg))
        return NULL;
    CHECK_CLOSED(self);
391
    ENSURE_REALIZED(self);
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406

    if (PyNumber_Check(arg)) {
        limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
        if (limit == -1 && PyErr_Occurred())
            return NULL;
    }
    else if (arg != Py_None) {
        PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
                     Py_TYPE(arg)->tp_name);
        return NULL;
    }
    return _stringio_readline(self, limit);
}

static PyObject *
407
stringio_iternext(stringio *self)
408 409 410 411 412
{
    PyObject *line;

    CHECK_INITIALIZED(self);
    CHECK_CLOSED(self);
413
    ENSURE_REALIZED(self);
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434

    if (Py_TYPE(self) == &PyStringIO_Type) {
        /* Skip method call overhead for speed */
        line = _stringio_readline(self, -1);
    }
    else {
        /* XXX is subclassing StringIO really supported? */
        line = PyObject_CallMethodObjArgs((PyObject *)self,
                                           _PyIO_str_readline, NULL);
        if (line && !PyUnicode_Check(line)) {
            PyErr_Format(PyExc_IOError,
                         "readline() should have returned an str object, "
                         "not '%.200s'", Py_TYPE(line)->tp_name);
            Py_DECREF(line);
            return NULL;
        }
    }

    if (line == NULL)
        return NULL;

435
    if (PyUnicode_GET_LENGTH(line) == 0) {
436 437 438 439 440 441 442 443 444 445 446 447
        /* Reached EOF */
        Py_DECREF(line);
        return NULL;
    }

    return line;
}

PyDoc_STRVAR(stringio_truncate_doc,
    "Truncate size to pos.\n"
    "\n"
    "The pos argument defaults to the current file position, as\n"
448
    "returned by tell().  The current file position is unchanged.\n"
449 450
    "Returns the new absolute position.\n");

451
static PyObject *
452
stringio_truncate(stringio *self, PyObject *args)
453 454 455 456
{
    Py_ssize_t size;
    PyObject *arg = Py_None;

457
    CHECK_INITIALIZED(self);
458 459
    if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
        return NULL;
460
    CHECK_CLOSED(self);
461

462 463
    if (PyNumber_Check(arg)) {
        size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
464 465
        if (size == -1 && PyErr_Occurred())
            return NULL;
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
    }
    else if (arg == Py_None) {
        /* Truncate to current position if no argument is passed. */
        size = self->pos;
    }
    else {
        PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
                     Py_TYPE(arg)->tp_name);
        return NULL;
    }

    if (size < 0) {
        PyErr_Format(PyExc_ValueError,
                     "Negative size value %zd", size);
        return NULL;
    }

    if (size < self->string_size) {
484
        ENSURE_REALIZED(self);
485 486
        if (resize_buffer(self, size) < 0)
            return NULL;
487
        self->string_size = size;
488 489 490 491 492
    }

    return PyLong_FromSsize_t(size);
}

493 494 495 496 497 498 499 500 501
PyDoc_STRVAR(stringio_seek_doc,
    "Change stream position.\n"
    "\n"
    "Seek to character offset pos relative to position indicated by whence:\n"
    "    0  Start of stream (the default).  pos should be >= 0;\n"
    "    1  Current position - pos must be 0;\n"
    "    2  End of stream - pos must be 0.\n"
    "Returns the new absolute position.\n");

502
static PyObject *
503
stringio_seek(stringio *self, PyObject *args)
504 505 506 507
{
    Py_ssize_t pos;
    int mode = 0;

508
    CHECK_INITIALIZED(self);
509 510
    if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
        return NULL;
511
    CHECK_CLOSED(self);
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543

    if (mode != 0 && mode != 1 && mode != 2) {
        PyErr_Format(PyExc_ValueError,
                     "Invalid whence (%i, should be 0, 1 or 2)", mode);
        return NULL;
    }
    else if (pos < 0 && mode == 0) {
        PyErr_Format(PyExc_ValueError,
                     "Negative seek position %zd", pos);
        return NULL;
    }
    else if (mode != 0 && pos != 0) {
        PyErr_SetString(PyExc_IOError,
                        "Can't do nonzero cur-relative seeks");
        return NULL;
    }

    /* mode 0: offset relative to beginning of the string.
       mode 1: no change to current position.
       mode 2: change position to end of file. */
    if (mode == 1) {
        pos = self->pos;
    }
    else if (mode == 2) {
        pos = self->string_size;
    }

    self->pos = pos;

    return PyLong_FromSsize_t(self->pos);
}

544 545 546 547 548 549
PyDoc_STRVAR(stringio_write_doc,
    "Write string to file.\n"
    "\n"
    "Returns the number of characters written, which is always equal to\n"
    "the length of the string.\n");

550
static PyObject *
551
stringio_write(stringio *self, PyObject *obj)
552 553 554
{
    Py_ssize_t size;

555 556
    CHECK_INITIALIZED(self);
    if (!PyUnicode_Check(obj)) {
557 558 559 560
        PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
                     Py_TYPE(obj)->tp_name);
        return NULL;
    }
Martin v. Löwis's avatar
Martin v. Löwis committed
561 562
    if (PyUnicode_READY(obj))
        return NULL;
563
    CHECK_CLOSED(self);
Martin v. Löwis's avatar
Martin v. Löwis committed
564
    size = PyUnicode_GET_LENGTH(obj);
565

566 567 568 569 570 571 572 573 574 575 576 577 578
    if (size > 0 && write_str(self, obj) < 0)
        return NULL;

    return PyLong_FromSsize_t(size);
}

PyDoc_STRVAR(stringio_close_doc,
    "Close the IO object. Attempting any further operation after the\n"
    "object is closed will raise a ValueError.\n"
    "\n"
    "This method has no effect if the file is already closed.\n");

static PyObject *
579
stringio_close(stringio *self)
580 581 582 583 584
{
    self->closed = 1;
    /* Free up some memory */
    if (resize_buffer(self, 0) < 0)
        return NULL;
585
    _PyAccu_Destroy(&self->accu);
586 587 588 589 590 591 592
    Py_CLEAR(self->readnl);
    Py_CLEAR(self->writenl);
    Py_CLEAR(self->decoder);
    Py_RETURN_NONE;
}

static int
593
stringio_traverse(stringio *self, visitproc visit, void *arg)
594 595 596 597
{
    Py_VISIT(self->dict);
    return 0;
}
598

599
static int
600
stringio_clear(stringio *self)
601 602 603
{
    Py_CLEAR(self->dict);
    return 0;
604 605 606
}

static void
607
stringio_dealloc(stringio *self)
608
{
609
    _PyObject_GC_UNTRACK(self);
610 611 612 613 614
    self->ok = 0;
    if (self->buf) {
        PyMem_Free(self->buf);
        self->buf = NULL;
    }
615
    _PyAccu_Destroy(&self->accu);
616 617 618
    Py_CLEAR(self->readnl);
    Py_CLEAR(self->writenl);
    Py_CLEAR(self->decoder);
619
    Py_CLEAR(self->dict);
620 621
    if (self->weakreflist != NULL)
        PyObject_ClearWeakRefs((PyObject *) self);
622 623 624 625 626 627
    Py_TYPE(self)->tp_free(self);
}

static PyObject *
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
628
    stringio *self;
629 630

    assert(type != NULL && type->tp_alloc != NULL);
631
    self = (stringio *)type->tp_alloc(type, 0);
632 633 634
    if (self == NULL)
        return NULL;

635 636 637
    /* tp_alloc initializes all the fields to zero. So we don't have to
       initialize them here. */

Martin v. Löwis's avatar
Martin v. Löwis committed
638
    self->buf = (Py_UCS4 *)PyMem_Malloc(0);
639 640 641 642 643 644 645 646
    if (self->buf == NULL) {
        Py_DECREF(self);
        return PyErr_NoMemory();
    }

    return (PyObject *)self;
}

647
static int
648
stringio_init(stringio *self, PyObject *args, PyObject *kwds)
649 650 651
{
    char *kwlist[] = {"initial_value", "newline", NULL};
    PyObject *value = NULL;
652
    PyObject *newline_obj = NULL;
653
    char *newline = "\n";
654
    Py_ssize_t value_len;
655

656 657
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
                                     &value, &newline_obj))
658 659
        return -1;

660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
    /* Parse the newline argument. This used to be done with the 'z'
       specifier, however this allowed any object with the buffer interface to
       be converted. Thus we have to parse it manually since we only want to
       allow unicode objects or None. */
    if (newline_obj == Py_None) {
        newline = NULL;
    }
    else if (newline_obj) {
        if (!PyUnicode_Check(newline_obj)) {
            PyErr_Format(PyExc_TypeError,
                         "newline must be str or None, not %.200s",
                         Py_TYPE(newline_obj)->tp_name);
            return -1;
        }
        newline = _PyUnicode_AsString(newline_obj);
        if (newline == NULL)
            return -1;
    }

679 680 681 682 683
    if (newline && newline[0] != '\0'
        && !(newline[0] == '\n' && newline[1] == '\0')
        && !(newline[0] == '\r' && newline[1] == '\0')
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
        PyErr_Format(PyExc_ValueError,
684
                     "illegal newline value: %R", newline_obj);
685 686 687
        return -1;
    }
    if (value && value != Py_None && !PyUnicode_Check(value)) {
688
        PyErr_Format(PyExc_TypeError,
689 690 691 692 693 694 695
                     "initial_value must be str or None, not %.200s",
                     Py_TYPE(value)->tp_name);
        return -1;
    }

    self->ok = 0;

696
    _PyAccu_Destroy(&self->accu);
697 698 699 700
    Py_CLEAR(self->readnl);
    Py_CLEAR(self->writenl);
    Py_CLEAR(self->decoder);

701 702 703
    assert((newline != NULL && newline_obj != Py_None) ||
           (newline == NULL && newline_obj == Py_None));

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
    if (newline) {
        self->readnl = PyUnicode_FromString(newline);
        if (self->readnl == NULL)
            return -1;
    }
    self->readuniversal = (newline == NULL || newline[0] == '\0');
    self->readtranslate = (newline == NULL);
    /* If newline == "", we don't translate anything.
       If newline == "\n" or newline == None, we translate to "\n", which is
       a no-op.
       (for newline == None, TextIOWrapper translates to os.sepline, but it
       is pointless for StringIO)
    */
    if (newline != NULL && newline[0] == '\r') {
        self->writenl = self->readnl;
        Py_INCREF(self->writenl);
    }

    if (self->readuniversal) {
        self->decoder = PyObject_CallFunction(
            (PyObject *)&PyIncrementalNewlineDecoder_Type,
            "Oi", Py_None, (int) self->readtranslate);
        if (self->decoder == NULL)
            return -1;
    }

    /* Now everything is set up, resize buffer to size of initial value,
       and copy it */
    self->string_size = 0;
733
    if (value && value != Py_None)
734
        value_len = PyUnicode_GetLength(value);
735 736 737
    else
        value_len = 0;
    if (value_len > 0) {
738 739
        /* This is a heuristic, for newline translation might change
           the string length. */
740
        if (resize_buffer(self, 0) < 0)
741
            return -1;
742
        self->state = STATE_REALIZED;
743 744 745 746 747
        self->pos = 0;
        if (write_str(self, value) < 0)
            return -1;
    }
    else {
748
        /* Empty stringio object, we can start by accumulating */
749 750
        if (resize_buffer(self, 0) < 0)
            return -1;
751 752 753
        if (_PyAccu_Init(&self->accu))
            return -1;
        self->state = STATE_ACCUMULATING;
754 755 756 757 758 759 760 761 762
    }
    self->pos = 0;

    self->closed = 0;
    self->ok = 1;
    return 0;
}

/* Properties and pseudo-properties */
763 764 765 766 767 768 769 770 771 772

PyDoc_STRVAR(stringio_readable_doc,
"readable() -> bool. Returns True if the IO object can be read.");

PyDoc_STRVAR(stringio_writable_doc,
"writable() -> bool. Returns True if the IO object can be written.");

PyDoc_STRVAR(stringio_seekable_doc,
"seekable() -> bool. Returns True if the IO object can be seeked.");

773
static PyObject *
774
stringio_seekable(stringio *self, PyObject *args)
775 776
{
    CHECK_INITIALIZED(self);
777
    CHECK_CLOSED(self);
778 779 780 781
    Py_RETURN_TRUE;
}

static PyObject *
782
stringio_readable(stringio *self, PyObject *args)
783 784
{
    CHECK_INITIALIZED(self);
785
    CHECK_CLOSED(self);
786 787 788 789
    Py_RETURN_TRUE;
}

static PyObject *
790
stringio_writable(stringio *self, PyObject *args)
791 792
{
    CHECK_INITIALIZED(self);
793
    CHECK_CLOSED(self);
794 795 796
    Py_RETURN_TRUE;
}

797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
/* Pickling support.

   The implementation of __getstate__ is similar to the one for BytesIO,
   except that we also save the newline parameter. For __setstate__ and unlike
   BytesIO, we call __init__ to restore the object's state. Doing so allows us
   to avoid decoding the complex newline state while keeping the object
   representation compact.

   See comment in bytesio.c regarding why only pickle protocols and onward are
   supported.
*/

static PyObject *
stringio_getstate(stringio *self)
{
    PyObject *initvalue = stringio_getvalue(self);
    PyObject *dict;
    PyObject *state;

    if (initvalue == NULL)
        return NULL;
    if (self->dict == NULL) {
        Py_INCREF(Py_None);
        dict = Py_None;
    }
    else {
        dict = PyDict_Copy(self->dict);
        if (dict == NULL)
            return NULL;
    }

    state = Py_BuildValue("(OOnN)", initvalue,
                          self->readnl ? self->readnl : Py_None,
                          self->pos, dict);
    Py_DECREF(initvalue);
    return state;
}

static PyObject *
stringio_setstate(stringio *self, PyObject *state)
{
    PyObject *initarg;
    PyObject *position_obj;
    PyObject *dict;
    Py_ssize_t pos;

    assert(state != NULL);
    CHECK_CLOSED(self);

    /* We allow the state tuple to be longer than 4, because we may need
       someday to extend the object's state without breaking
       backward-compatibility. */
    if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
        PyErr_Format(PyExc_TypeError,
                     "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
        return NULL;
    }

    /* Initialize the object's state. */
    initarg = PyTuple_GetSlice(state, 0, 2);
    if (initarg == NULL)
        return NULL;
    if (stringio_init(self, initarg, NULL) < 0) {
        Py_DECREF(initarg);
        return NULL;
    }
    Py_DECREF(initarg);

    /* Restore the buffer state. Even if __init__ did initialize the buffer,
       we have to initialize it again since __init__ may translates the
       newlines in the inital_value string. We clearly do not want that
       because the string value in the state tuple has already been translated
       once by __init__. So we do not take any chance and replace object's
       buffer completely. */
    {
Martin v. Löwis's avatar
Martin v. Löwis committed
873 874 875 876 877 878 879
        PyObject *item;
        Py_UCS4 *buf;
        Py_ssize_t bufsize;

        item = PyTuple_GET_ITEM(state, 0);
        buf = PyUnicode_AsUCS4Copy(item);
        if (buf == NULL)
880
            return NULL;
Martin v. Löwis's avatar
Martin v. Löwis committed
881 882 883 884 885 886 887 888
        bufsize = PyUnicode_GET_LENGTH(item);

        if (resize_buffer(self, bufsize) < 0) {
            PyMem_Free(buf);
            return NULL;
        }
        memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
        PyMem_Free(buf);
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
        self->string_size = bufsize;
    }

    /* Set carefully the position value. Alternatively, we could use the seek
       method instead of modifying self->pos directly to better protect the
       object internal state against errneous (or malicious) inputs. */
    position_obj = PyTuple_GET_ITEM(state, 2);
    if (!PyLong_Check(position_obj)) {
        PyErr_Format(PyExc_TypeError,
                     "third item of state must be an integer, got %.200s",
                     Py_TYPE(position_obj)->tp_name);
        return NULL;
    }
    pos = PyLong_AsSsize_t(position_obj);
    if (pos == -1 && PyErr_Occurred())
        return NULL;
    if (pos < 0) {
        PyErr_SetString(PyExc_ValueError,
                        "position value cannot be negative");
        return NULL;
    }
    self->pos = pos;

    /* Set the dictionary of the instance variables. */
    dict = PyTuple_GET_ITEM(state, 3);
    if (dict != Py_None) {
        if (!PyDict_Check(dict)) {
            PyErr_Format(PyExc_TypeError,
                         "fourth item of state should be a dict, got a %.200s",
                         Py_TYPE(dict)->tp_name);
            return NULL;
        }
        if (self->dict) {
            /* Alternatively, we could replace the internal dictionary
               completely. However, it seems more practical to just update it. */
            if (PyDict_Update(self->dict, dict) < 0)
                return NULL;
        }
        else {
            Py_INCREF(dict);
            self->dict = dict;
        }
    }

    Py_RETURN_NONE;
}


937
static PyObject *
938
stringio_closed(stringio *self, void *context)
939 940 941 942 943 944
{
    CHECK_INITIALIZED(self);
    return PyBool_FromLong(self->closed);
}

static PyObject *
945
stringio_line_buffering(stringio *self, void *context)
946 947 948 949 950 951 952
{
    CHECK_INITIALIZED(self);
    CHECK_CLOSED(self);
    Py_RETURN_FALSE;
}

static PyObject *
953
stringio_newlines(stringio *self, void *context)
954 955 956 957 958 959 960 961
{
    CHECK_INITIALIZED(self);
    CHECK_CLOSED(self);
    if (self->decoder == NULL)
        Py_RETURN_NONE;
    return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
}

962
static struct PyMethodDef stringio_methods[] = {
963
    {"close",    (PyCFunction)stringio_close,    METH_NOARGS,  stringio_close_doc},
964
    {"getvalue", (PyCFunction)stringio_getvalue, METH_NOARGS,  stringio_getvalue_doc},
965 966 967 968 969 970
    {"read",     (PyCFunction)stringio_read,     METH_VARARGS, stringio_read_doc},
    {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
    {"tell",     (PyCFunction)stringio_tell,     METH_NOARGS,  stringio_tell_doc},
    {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
    {"seek",     (PyCFunction)stringio_seek,     METH_VARARGS, stringio_seek_doc},
    {"write",    (PyCFunction)stringio_write,    METH_O,       stringio_write_doc},
971

972 973 974
    {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS, stringio_seekable_doc},
    {"readable", (PyCFunction)stringio_readable, METH_NOARGS, stringio_readable_doc},
    {"writable", (PyCFunction)stringio_writable, METH_NOARGS, stringio_writable_doc},
975 976 977

    {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
    {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
978 979 980
    {NULL, NULL}        /* sentinel */
};

981 982 983 984 985 986 987 988 989 990
static PyGetSetDef stringio_getset[] = {
    {"closed",         (getter)stringio_closed,         NULL, NULL},
    {"newlines",       (getter)stringio_newlines,       NULL, NULL},
    /*  (following comments straight off of the original Python wrapper:)
        XXX Cruft to support the TextIOWrapper API. This would only
        be meaningful if StringIO supported the buffer attribute.
        Hopefully, a better solution, than adding these pseudo-attributes,
        will be found.
    */
    {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
991
    {NULL}
992 993 994
};

PyTypeObject PyStringIO_Type = {
995
    PyVarObject_HEAD_INIT(NULL, 0)
996
    "_io.StringIO",                            /*tp_name*/
997
    sizeof(stringio),                    /*tp_basicsize*/
998 999 1000 1001 1002
    0,                                         /*tp_itemsize*/
    (destructor)stringio_dealloc,              /*tp_dealloc*/
    0,                                         /*tp_print*/
    0,                                         /*tp_getattr*/
    0,                                         /*tp_setattr*/
1003
    0,                                         /*tp_reserved*/
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
    0,                                         /*tp_repr*/
    0,                                         /*tp_as_number*/
    0,                                         /*tp_as_sequence*/
    0,                                         /*tp_as_mapping*/
    0,                                         /*tp_hash*/
    0,                                         /*tp_call*/
    0,                                         /*tp_str*/
    0,                                         /*tp_getattro*/
    0,                                         /*tp_setattro*/
    0,                                         /*tp_as_buffer*/
1014 1015 1016 1017 1018
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
                       | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
    stringio_doc,                              /*tp_doc*/
    (traverseproc)stringio_traverse,           /*tp_traverse*/
    (inquiry)stringio_clear,                   /*tp_clear*/
1019
    0,                                         /*tp_richcompare*/
1020
    offsetof(stringio, weakreflist),            /*tp_weaklistoffset*/
1021
    0,                                         /*tp_iter*/
1022
    (iternextfunc)stringio_iternext,           /*tp_iternext*/
1023 1024
    stringio_methods,                          /*tp_methods*/
    0,                                         /*tp_members*/
1025
    stringio_getset,                           /*tp_getset*/
1026 1027 1028 1029
    0,                                         /*tp_base*/
    0,                                         /*tp_dict*/
    0,                                         /*tp_descr_get*/
    0,                                         /*tp_descr_set*/
1030
    offsetof(stringio, dict),                  /*tp_dictoffset*/
1031
    (initproc)stringio_init,                   /*tp_init*/
1032 1033 1034
    0,                                         /*tp_alloc*/
    stringio_new,                              /*tp_new*/
};