Kaydet (Commit) e71d574a authored tarafından Antoine Pitrou's avatar Antoine Pitrou

Migrate str.expandtabs to the new API

üst 310638ea
...@@ -10190,87 +10190,78 @@ If tabsize is not given, a tab size of 8 characters is assumed."); ...@@ -10190,87 +10190,78 @@ If tabsize is not given, a tab size of 8 characters is assumed.");
static PyObject* static PyObject*
unicode_expandtabs(PyUnicodeObject *self, PyObject *args) unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
{ {
Py_UNICODE *e; Py_ssize_t i, j, line_pos, src_len, incr;
Py_UNICODE *p; Py_UCS4 ch;
Py_UNICODE *q; PyObject *u;
Py_UNICODE *qe; void *src_data, *dest_data;
Py_ssize_t i, j, incr, wstr_length;
PyUnicodeObject *u;
int tabsize = 8; int tabsize = 8;
int kind;
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
return NULL; return NULL;
if (PyUnicode_AsUnicodeAndSize((PyObject *)self, &wstr_length) == NULL)
return NULL;
/* First pass: determine size of output string */ /* First pass: determine size of output string */
i = 0; /* chars up to and including most recent \n or \r */ src_len = PyUnicode_GET_LENGTH(self);
j = 0; /* chars since most recent \n or \r (use in tab calculations) */ i = j = line_pos = 0;
e = _PyUnicode_WSTR(self) + wstr_length; /* end of input */ kind = PyUnicode_KIND(self);
for (p = _PyUnicode_WSTR(self); p < e; p++) src_data = PyUnicode_DATA(self);
if (*p == '\t') { for (; i < src_len; i++) {
ch = PyUnicode_READ(kind, src_data, i);
if (ch == '\t') {
if (tabsize > 0) { if (tabsize > 0) {
incr = tabsize - (j % tabsize); /* cannot overflow */ incr = tabsize - (line_pos % tabsize); /* cannot overflow */
if (j > PY_SSIZE_T_MAX - incr) if (j > PY_SSIZE_T_MAX - incr)
goto overflow1; goto overflow;
line_pos += incr;
j += incr; j += incr;
} }
} }
else { else {
if (j > PY_SSIZE_T_MAX - 1) if (j > PY_SSIZE_T_MAX - 1)
goto overflow1; goto overflow;
line_pos++;
j++; j++;
if (*p == '\n' || *p == '\r') { if (ch == '\n' || ch == '\r')
if (i > PY_SSIZE_T_MAX - j) line_pos = 0;
goto overflow1;
i += j;
j = 0;
}
} }
}
if (i > PY_SSIZE_T_MAX - j)
goto overflow1;
/* Second pass: create output string and fill it */ /* Second pass: create output string and fill it */
u = _PyUnicode_New(i + j); u = PyUnicode_New(j, PyUnicode_MAX_CHAR_VALUE(self));
if (!u) if (!u)
return NULL; return NULL;
dest_data = PyUnicode_DATA(u);
j = 0; /* same as in first pass */ i = j = line_pos = 0;
q = _PyUnicode_WSTR(u); /* next output char */
qe = _PyUnicode_WSTR(u) + PyUnicode_GET_SIZE(u); /* end of output */
for (p = _PyUnicode_WSTR(self); p < e; p++) for (; i < src_len; i++) {
if (*p == '\t') { ch = PyUnicode_READ(kind, src_data, i);
if (ch == '\t') {
if (tabsize > 0) { if (tabsize > 0) {
i = tabsize - (j % tabsize); incr = tabsize - (line_pos % tabsize);
j += i; line_pos += incr;
while (i--) { while (incr--) {
if (q >= qe) PyUnicode_WRITE(kind, dest_data, j, ' ');
goto overflow2; j++;
*q++ = ' ';
} }
} }
} }
else { else {
if (q >= qe) line_pos++;
goto overflow2; PyUnicode_WRITE(kind, dest_data, j, ch);
*q++ = *p;
j++; j++;
if (*p == '\n' || *p == '\r') if (ch == '\n' || ch == '\r')
j = 0; line_pos = 0;
} }
}
if (_PyUnicode_READY_REPLACE(&u)) { assert (j == PyUnicode_GET_LENGTH(u));
if (PyUnicode_READY(u)) {
Py_DECREF(u); Py_DECREF(u);
return NULL; return NULL;
} }
return (PyObject*) u; return (PyObject*) u;
overflow2: overflow:
Py_DECREF(u);
overflow1:
PyErr_SetString(PyExc_OverflowError, "new string is too long"); PyErr_SetString(PyExc_OverflowError, "new string is too long");
return NULL; return NULL;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment