Kaydet (Commit) d9d769fc authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka

Issue #23573: Increased performance of string search operations (str.find,

str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).
üst f7ef4758
...@@ -10,6 +10,10 @@ Release date: 2015-03-28 ...@@ -10,6 +10,10 @@ Release date: 2015-03-28
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #23573: Increased performance of string search operations (str.find,
str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).
- Issue #23753: Python doesn't support anymore platforms without stat() or - Issue #23753: Python doesn't support anymore platforms without stat() or
fstat(), these functions are always required. fstat(), these functions are always required.
......
...@@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) ...@@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
char byte; char byte;
Py_buffer subbuf; Py_buffer subbuf;
const char *sub; const char *sub;
Py_ssize_t sub_len; Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res; Py_ssize_t res;
...@@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) ...@@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
sub = &byte; sub = &byte;
sub_len = 1; sub_len = 1;
} }
len = PyByteArray_GET_SIZE(self);
if (dir > 0) ADJUST_INDICES(start, end, len);
res = stringlib_find_slice( if (end - start < sub_len)
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), res = -1;
sub, sub_len, start, end); else if (sub_len == 1) {
else unsigned char needle = *sub;
res = stringlib_rfind_slice( int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), res = stringlib_fastsearch_memchr_1char(
sub, sub_len, start, end); PyByteArray_AS_STRING(self) + start, end - start,
needle, needle, mode);
if (res >= 0)
res += start;
}
else {
if (dir > 0)
res = stringlib_find_slice(
PyByteArray_AS_STRING(self), len,
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyByteArray_AS_STRING(self), len,
sub, sub_len, start, end);
}
if (subobj) if (subobj)
PyBuffer_Release(&subbuf); PyBuffer_Release(&subbuf);
......
...@@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) ...@@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
char byte; char byte;
Py_buffer subbuf; Py_buffer subbuf;
const char *sub; const char *sub;
Py_ssize_t sub_len; Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res; Py_ssize_t res;
...@@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) ...@@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub = &byte; sub = &byte;
sub_len = 1; sub_len = 1;
} }
len = PyBytes_GET_SIZE(self);
if (dir > 0) ADJUST_INDICES(start, end, len);
res = stringlib_find_slice( if (end - start < sub_len)
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), res = -1;
sub, sub_len, start, end); else if (sub_len == 1) {
else unsigned char needle = *sub;
res = stringlib_rfind_slice( int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), res = stringlib_fastsearch_memchr_1char(
sub, sub_len, start, end); PyBytes_AS_STRING(self) + start, end - start,
needle, needle, mode);
if (res >= 0)
res += start;
}
else {
if (dir > 0)
res = stringlib_find_slice(
PyBytes_AS_STRING(self), len,
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyBytes_AS_STRING(self), len,
sub, sub_len, start, end);
}
if (subobj) if (subobj)
PyBuffer_Release(&subbuf); PyBuffer_Release(&subbuf);
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
STRINGLIB_CHAR ch, unsigned char needle, STRINGLIB_CHAR ch, unsigned char needle,
Py_ssize_t maxcount, int mode) int mode)
{ {
if (mode == FAST_SEARCH) { if (mode == FAST_SEARCH) {
const STRINGLIB_CHAR *ptr = s; const STRINGLIB_CHAR *ptr = s;
...@@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (needle != 0) if (needle != 0)
#endif #endif
return STRINGLIB(fastsearch_memchr_1char) return STRINGLIB(fastsearch_memchr_1char)
(s, n, p[0], needle, maxcount, mode); (s, n, p[0], needle, mode);
} }
if (mode == FAST_COUNT) { if (mode == FAST_COUNT) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
......
...@@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{ {
Py_ssize_t pos; Py_ssize_t pos;
if (str_len < 0) assert(str_len >= 0);
return -1;
if (sub_len == 0) if (sub_len == 0)
return offset; return offset;
...@@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{ {
Py_ssize_t pos; Py_ssize_t pos;
if (str_len < 0) assert(str_len >= 0);
return -1;
if (sub_len == 0) if (sub_len == 0)
return str_len + offset; return str_len + offset;
...@@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos; return pos;
} }
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
ADJUST_INDICES(start, end, str_len);
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start); return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
} }
...@@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
ADJUST_INDICES(start, end, str_len);
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start); return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment