Kaydet (Commit) d9d769fc authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka

Issue #23573: Increased performance of string search operations (str.find,

str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).
üst f7ef4758
......@@ -10,6 +10,10 @@ Release date: 2015-03-28
Core and Builtins
-----------------
- Issue #23573: Increased performance of string search operations (str.find,
str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).
- Issue #23753: Python doesn't support anymore platforms without stat() or
fstat(), these functions are always required.
......
......@@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
char byte;
Py_buffer subbuf;
const char *sub;
Py_ssize_t sub_len;
Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res;
......@@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
sub = &byte;
sub_len = 1;
}
len = PyByteArray_GET_SIZE(self);
if (dir > 0)
res = stringlib_find_slice(
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, sub_len, start, end);
ADJUST_INDICES(start, end, len);
if (end - start < sub_len)
res = -1;
else if (sub_len == 1) {
unsigned char needle = *sub;
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
res = stringlib_fastsearch_memchr_1char(
PyByteArray_AS_STRING(self) + start, end - start,
needle, needle, mode);
if (res >= 0)
res += start;
}
else {
if (dir > 0)
res = stringlib_find_slice(
PyByteArray_AS_STRING(self), len,
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyByteArray_AS_STRING(self), len,
sub, sub_len, start, end);
}
if (subobj)
PyBuffer_Release(&subbuf);
......
......@@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
char byte;
Py_buffer subbuf;
const char *sub;
Py_ssize_t sub_len;
Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res;
......@@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub = &byte;
sub_len = 1;
}
len = PyBytes_GET_SIZE(self);
if (dir > 0)
res = stringlib_find_slice(
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
sub, sub_len, start, end);
ADJUST_INDICES(start, end, len);
if (end - start < sub_len)
res = -1;
else if (sub_len == 1) {
unsigned char needle = *sub;
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
res = stringlib_fastsearch_memchr_1char(
PyBytes_AS_STRING(self) + start, end - start,
needle, needle, mode);
if (res >= 0)
res += start;
}
else {
if (dir > 0)
res = stringlib_find_slice(
PyBytes_AS_STRING(self), len,
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyBytes_AS_STRING(self), len,
sub, sub_len, start, end);
}
if (subobj)
PyBuffer_Release(&subbuf);
......
......@@ -36,7 +36,7 @@
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
STRINGLIB_CHAR ch, unsigned char needle,
Py_ssize_t maxcount, int mode)
int mode)
{
if (mode == FAST_SEARCH) {
const STRINGLIB_CHAR *ptr = s;
......@@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (needle != 0)
#endif
return STRINGLIB(fastsearch_memchr_1char)
(s, n, p[0], needle, maxcount, mode);
(s, n, p[0], needle, mode);
}
if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
......
......@@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{
Py_ssize_t pos;
if (str_len < 0)
return -1;
assert(str_len >= 0);
if (sub_len == 0)
return offset;
......@@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{
Py_ssize_t pos;
if (str_len < 0)
return -1;
assert(str_len >= 0);
if (sub_len == 0)
return str_len + offset;
......@@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
ADJUST_INDICES(start, end, str_len);
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
}
......@@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
ADJUST_INDICES(start, end, str_len);
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment