Kaydet (Commit) 6467213b authored tarafından Antoine Pitrou's avatar Antoine Pitrou

Issue #7622: Improve the split(), rsplit(), splitlines() and replace()

methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search.  Patch by Florent Xicluna.
üst d0ff51c4
...@@ -563,6 +563,7 @@ STRINGLIB_HEADERS= \ ...@@ -563,6 +563,7 @@ STRINGLIB_HEADERS= \
$(srcdir)/Objects/stringlib/find.h \ $(srcdir)/Objects/stringlib/find.h \
$(srcdir)/Objects/stringlib/formatter.h \ $(srcdir)/Objects/stringlib/formatter.h \
$(srcdir)/Objects/stringlib/partition.h \ $(srcdir)/Objects/stringlib/partition.h \
$(srcdir)/Objects/stringlib/split.h \
$(srcdir)/Objects/stringlib/stringdefs.h \ $(srcdir)/Objects/stringlib/stringdefs.h \
$(srcdir)/Objects/stringlib/string_format.h \ $(srcdir)/Objects/stringlib/string_format.h \
$(srcdir)/Objects/stringlib/transmogrify.h \ $(srcdir)/Objects/stringlib/transmogrify.h \
......
...@@ -12,6 +12,10 @@ What's New in Python 2.7 alpha 3? ...@@ -12,6 +12,10 @@ What's New in Python 2.7 alpha 3?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search. Patch by Florent Xicluna.
- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds - Issue #7632: Fix a crash in dtoa.c that occurred in debug builds
when parsing certain long numeric strings corresponding to subnormal when parsing certain long numeric strings corresponding to subnormal
values. Also fix a number of bugs in dtoa.c that could lead to values. Also fix a number of bugs in dtoa.c that could lead to
......
This diff is collapsed.
...@@ -28,3 +28,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) ...@@ -28,3 +28,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
returns the pointer to the character data for the given string returns the pointer to the character data for the given string
object (which must be of the right type) object (which must be of the right type)
int STRINGLIB_CHECK_EXACT(PyObject *)
returns true if the object is an instance of our type, not a subclass.
STRINGLIB_MUTABLE
Must be 0 or 1 to tell the cpp macros in stringlib code if the object
being operated on is mutable or not.
...@@ -9,28 +9,22 @@ ...@@ -9,28 +9,22 @@
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len) const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{ {
Py_ssize_t count; Py_ssize_t count;
if (str_len < 0) if (str_len < 0)
return 0; /* start > len(str) */ return 0; /* start > len(str) */
if (sub_len == 0) if (sub_len == 0)
return str_len + 1; return (str_len < maxcount) ? str_len + 1 : maxcount;
count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT); count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
if (count < 0) if (count < 0)
count = 0; /* no match */ return 0; /* no match */
return count; return count;
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
...@@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self) ...@@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self)
STRINGLIB_LEN(self)); STRINGLIB_LEN(self));
return newobj; return newobj;
} }
...@@ -18,10 +18,13 @@ ...@@ -18,10 +18,13 @@
#define FAST_SEARCH 1 #define FAST_SEARCH 1
#define FAST_RSEARCH 2 #define FAST_RSEARCH 2
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m, const STRINGLIB_CHAR* p, Py_ssize_t m,
int mode) Py_ssize_t maxcount, int mode)
{ {
long mask; long mask;
Py_ssize_t skip, count = 0; Py_ssize_t skip, count = 0;
...@@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
w = n - m; w = n - m;
if (w < 0) if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
return -1; return -1;
/* look for special cases */ /* look for special cases */
...@@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* use special case for 1-character strings */ /* use special case for 1-character strings */
if (mode == FAST_COUNT) { if (mode == FAST_COUNT) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
if (s[i] == p[0]) if (s[i] == p[0]) {
count++; count++;
if (count == maxcount)
return maxcount;
}
return count; return count;
} else if (mode == FAST_SEARCH) { } else if (mode == FAST_SEARCH) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
...@@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
mlast = m - 1; mlast = m - 1;
skip = mlast - 1; skip = mlast - 1;
mask = 0;
if (mode != FAST_RSEARCH) { if (mode != FAST_RSEARCH) {
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[:-1] */ /* process pattern[:-1] */
for (mask = i = 0; i < mlast; i++) { for (i = 0; i < mlast; i++) {
mask |= (1 << (p[i] & 0x1F)); BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast]) if (p[i] == p[mlast])
skip = mlast - i - 1; skip = mlast - i - 1;
} }
/* process pattern[-1] outside the loop */ /* process pattern[-1] outside the loop */
mask |= (1 << (p[mlast] & 0x1F)); BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) { for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */ /* note: using mlast in the skip path slows things down on x86 */
...@@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (mode != FAST_COUNT) if (mode != FAST_COUNT)
return i; return i;
count++; count++;
if (count == maxcount)
return maxcount;
i = i + mlast; i = i + mlast;
continue; continue;
} }
/* miss: check if next character is part of pattern */ /* miss: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F)))) if (!BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
else else
i = i + skip; i = i + skip;
} else { } else {
/* skip: check if next character is part of pattern */ /* skip: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F)))) if (!BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
} }
} }
...@@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */ /* process pattern[0] outside the loop */
mask = (1 << (p[0] & 0x1F)); BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */ /* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) { for (i = mlast; i > 0; i--) {
mask |= (1 << (p[i] & 0x1F)); BLOOM_ADD(mask, p[i]);
if (p[i] == p[0]) if (p[i] == p[0])
skip = i - 1; skip = i - 1;
} }
...@@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* got a match! */ /* got a match! */
return i; return i;
/* miss: check if previous character is part of pattern */ /* miss: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F)))) if (!BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
else else
i = i - skip; i = i - skip;
} else { } else {
/* skip: check if previous character is part of pattern */ /* skip: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F)))) if (!BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
} }
} }
...@@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
...@@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0) if (sub_len == 0)
return offset; return offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH); pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0) if (pos >= 0)
pos += offset; pos += offset;
...@@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0) if (sub_len == 0)
return str_len + offset; return str_len + offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_RSEARCH); pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0) if (pos >= 0)
pos += offset; pos += offset;
...@@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos; return pos;
} }
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
if (start < 0) ADJUST_INDICES(start, end, str_len);
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
return stringlib_find(str + start, end - start, sub, sub_len, start); return stringlib_find(str + start, end - start, sub, sub_len, start);
} }
...@@ -71,21 +76,11 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -71,21 +76,11 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
if (start < 0) ADJUST_INDICES(start, end, str_len);
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
return stringlib_rfind(str + start, end - start, sub, sub_len, start); return stringlib_rfind(str + start, end - start, sub, sub_len, start);
} }
#if defined(STRINGLIB_STR) && !defined(FROM_BYTEARRAY) #ifdef STRINGLIB_WANT_CONTAINS_OBJ
Py_LOCAL_INLINE(int) Py_LOCAL_INLINE(int)
stringlib_contains_obj(PyObject* str, PyObject* sub) stringlib_contains_obj(PyObject* str, PyObject* sub)
...@@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub) ...@@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub)
) != -1; ) != -1;
} }
#endif /* STRINGLIB_STR */ #endif /* STRINGLIB_WANT_CONTAINS_OBJ */
#ifdef FROM_UNICODE #if STRINGLIB_IS_UNICODE
/* /*
This function is a helper for the "find" family (find, rfind, index, This function is a helper for the "find" family (find, rfind, index,
...@@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring, ...@@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring,
return 1; return 1;
} }
#endif /* FROM_UNICODE */ #endif /* STRINGLIB_IS_UNICODE */
#endif /* STRINGLIB_FIND_H */ #endif /* STRINGLIB_FIND_H */
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
...@@ -8,33 +8,39 @@ ...@@ -8,33 +8,39 @@
#endif #endif
Py_LOCAL_INLINE(PyObject*) Py_LOCAL_INLINE(PyObject*)
stringlib_partition( stringlib_partition(PyObject* str_obj,
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len PyObject* sep_obj,
) const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{ {
PyObject* out; PyObject* out;
Py_ssize_t pos; Py_ssize_t pos;
if (sep_len == 0) { if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator"); PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL; return NULL;
} }
out = PyTuple_New(3); out = PyTuple_New(3);
if (!out) if (!out)
return NULL; return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH); pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0) { if (pos < 0) {
Py_INCREF(str_obj); #if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
Py_INCREF(STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
Py_INCREF(STRINGLIB_EMPTY); #else
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); Py_INCREF(str_obj);
return out; PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
#endif
return out;
} }
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
...@@ -44,41 +50,47 @@ stringlib_partition( ...@@ -44,41 +50,47 @@ stringlib_partition(
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
Py_DECREF(out); Py_DECREF(out);
return NULL; return NULL;
} }
return out; return out;
} }
Py_LOCAL_INLINE(PyObject*) Py_LOCAL_INLINE(PyObject*)
stringlib_rpartition( stringlib_rpartition(PyObject* str_obj,
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len PyObject* sep_obj,
) const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{ {
PyObject* out; PyObject* out;
Py_ssize_t pos; Py_ssize_t pos;
if (sep_len == 0) { if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator"); PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL; return NULL;
} }
out = PyTuple_New(3); out = PyTuple_New(3);
if (!out) if (!out)
return NULL; return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_RSEARCH); pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0) { if (pos < 0) {
Py_INCREF(STRINGLIB_EMPTY); #if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
Py_INCREF(STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
Py_INCREF(str_obj); #else
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); Py_INCREF(STRINGLIB_EMPTY);
return out; PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif
return out;
} }
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
...@@ -88,18 +100,11 @@ stringlib_rpartition( ...@@ -88,18 +100,11 @@ stringlib_rpartition(
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
Py_DECREF(out); Py_DECREF(out);
return NULL; return NULL;
} }
return out; return out;
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
This diff is collapsed.
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S" #define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring #define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) #define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) #define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER Py_TOUPPER #define STRINGLIB_TOUPPER Py_TOUPPER
...@@ -21,8 +23,11 @@ ...@@ -21,8 +23,11 @@
#define STRINGLIB_NEW PyString_FromStringAndSize #define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_RESIZE _PyString_Resize #define STRINGLIB_RESIZE _PyString_Resize
#define STRINGLIB_CHECK PyString_Check #define STRINGLIB_CHECK PyString_Check
#define STRINGLIB_CHECK_EXACT PyString_CheckExact
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping #define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale #define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
#define STRINGLIB_WANT_CONTAINS_OBJ 1
#endif /* !STRINGLIB_STRINGDEFS_H */ #endif /* !STRINGLIB_STRINGDEFS_H */
/* NOTE: this API is -ONLY- for use with single byte character strings. */ /* NOTE: this API is -ONLY- for use with single byte character strings. */
/* Do not use it with Unicode. */ /* Do not use it with Unicode. */
#include "bytes_methods.h"
#ifndef STRINGLIB_MUTABLE
#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
#define STRINGLIB_MUTABLE 0
#endif
/* the more complicated methods. parts of these should be pulled out into the /* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */ shared code in bytes_methods.c to cut down on duplicate code bloat. */
...@@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args) ...@@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)
return (PyObject*) s; return (PyObject*) s;
} }
#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
str = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
stringlib_splitlines(PyObject *self, PyObject *args)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
int keepends = 0;
PyObject *list;
PyObject *str;
char *data;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
data = STRINGLIB_STR(self);
len = STRINGLIB_LEN(self);
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
list = PyList_New(0);
if (!list)
goto onError;
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && data[i] != '\n' && data[i] != '\r')
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
_STRINGLIB_SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
_STRINGLIB_SPLIT_APPEND(data, j, len);
}
return list;
onError:
Py_XDECREF(list);
return NULL;
}
#undef _STRINGLIB_SPLIT_APPEND
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U" #define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty #define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL #define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER #define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
...@@ -21,6 +23,7 @@ ...@@ -21,6 +23,7 @@
#define STRINGLIB_NEW PyUnicode_FromUnicode #define STRINGLIB_NEW PyUnicode_FromUnicode
#define STRINGLIB_RESIZE PyUnicode_Resize #define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping #define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#if PY_VERSION_HEX < 0x03000000 #if PY_VERSION_HEX < 0x03000000
......
This diff is collapsed.
This diff is collapsed.
...@@ -1538,6 +1538,10 @@ ...@@ -1538,6 +1538,10 @@
RelativePath="..\..\Objects\sliceobject.c" RelativePath="..\..\Objects\sliceobject.c"
> >
</File> </File>
<File
RelativePath="..\..\Objects\stringlib\split.h"
>
</File>
<File <File
RelativePath="..\..\Objects\structseq.c" RelativePath="..\..\Objects\structseq.c"
> >
......
...@@ -1538,6 +1538,10 @@ ...@@ -1538,6 +1538,10 @@
RelativePath="..\Objects\sliceobject.c" RelativePath="..\Objects\sliceobject.c"
> >
</File> </File>
<File
RelativePath="..\Objects\stringlib\split.h"
>
</File>
<File <File
RelativePath="..\Objects\structseq.c" RelativePath="..\Objects\structseq.c"
> >
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment