Kaydet (Commit) de02bcb2 authored tarafından Walter Dörwald's avatar Walter Dörwald

Apply patch diff.txt from SF feature request

http://www.python.org/sf/444708

This adds the optional argument for str.strip
to unicode.strip too and makes it possible
to call str.strip with a unicode argument
and unicode.strip with a str argument.
üst a7cc43b9
...@@ -235,17 +235,28 @@ The functions defined in this module are: ...@@ -235,17 +235,28 @@ The functions defined in this module are:
\function{joinfields()} was only used with two arguments.) \function{joinfields()} was only used with two arguments.)
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{lstrip}{s} \begin{funcdesc}{lstrip}{s\optional{, chars}}
Return a copy of \var{s} but without leading whitespace characters. Return a copy of the string with leading characters removed. If
\var{chars} is omitted or \code{None}, whitespace characters are
removed. If given and not \code{None}, \var{chars} must be a string;
the characters in the string will be stripped from the beginning of
the string this method is called on.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{rstrip}{s} \begin{funcdesc}{rstrip}{s\optional{, chars}}
Return a copy of \var{s} but without trailing whitespace Return a copy of the string with trailing characters removed. If
characters. \var{chars} is omitted or \code{None}, whitespace characters are
removed. If given and not \code{None}, \var{chars} must be a string;
the characters in the string will be stripped from the end of the
string this method is called on.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{strip}{s} \begin{funcdesc}{strip}{s\optional{, chars}}
Return a copy of \var{s} without leading or trailing whitespace. Return a copy of the string with leading and trailing characters
removed. If \var{chars} is omitted or \code{None}, whitespace
characters are removed. If given and not \code{None}, \var{chars}
must be a string; the characters in the string will be stripped from
the both ends of the string this method is called on.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{swapcase}{s} \begin{funcdesc}{swapcase}{s}
......
...@@ -1040,6 +1040,13 @@ extern DL_IMPORT(int) PyUnicode_Contains( ...@@ -1040,6 +1040,13 @@ extern DL_IMPORT(int) PyUnicode_Contains(
PyObject *element /* Element string */ PyObject *element /* Element string */
); );
/* Externally visible for str.strip(unicode) */
extern DL_IMPORT(PyObject *) _PyUnicode_XStrip(
PyUnicodeObject *self,
int striptype,
PyObject *sepobj
);
/* === Characters Type APIs =============================================== */ /* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and /* These should not be used directly. Use the Py_UNICODE_IS* and
......
...@@ -169,12 +169,18 @@ def run_method_tests(test): ...@@ -169,12 +169,18 @@ def run_method_tests(test):
test('rstrip', ' hello ', ' hello', None) test('rstrip', ' hello ', ' hello', None)
test('strip', 'hello', 'hello', None) test('strip', 'hello', 'hello', None)
# strip/lstrip/rstrip with real arg # strip/lstrip/rstrip with str arg
test('strip', 'xyzzyhelloxyzzy', 'hello', 'xyz') test('strip', 'xyzzyhelloxyzzy', 'hello', 'xyz')
test('lstrip', 'xyzzyhelloxyzzy', 'helloxyzzy', 'xyz') test('lstrip', 'xyzzyhelloxyzzy', 'helloxyzzy', 'xyz')
test('rstrip', 'xyzzyhelloxyzzy', 'xyzzyhello', 'xyz') test('rstrip', 'xyzzyhelloxyzzy', 'xyzzyhello', 'xyz')
test('strip', 'hello', 'hello', 'xyz') test('strip', 'hello', 'hello', 'xyz')
# strip/lstrip/rstrip with unicode arg
test('strip', 'xyzzyhelloxyzzy', u'hello', u'xyz')
test('lstrip', 'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
test('rstrip', 'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
test('strip', 'hello', u'hello', u'xyz')
test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS') test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS')
test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def') test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def')
......
...@@ -169,6 +169,24 @@ test('lstrip', u' hello ', u'hello ') ...@@ -169,6 +169,24 @@ test('lstrip', u' hello ', u'hello ')
test('rstrip', u' hello ', u' hello') test('rstrip', u' hello ', u' hello')
test('strip', u'hello', u'hello') test('strip', u'hello', u'hello')
# strip/lstrip/rstrip with None arg
test('strip', u' hello ', u'hello', None)
test('lstrip', u' hello ', u'hello ', None)
test('rstrip', u' hello ', u' hello', None)
test('strip', u'hello', u'hello', None)
# strip/lstrip/rstrip with unicode arg
test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
test('strip', u'hello', u'hello', u'xyz')
# strip/lstrip/rstrip with str arg
test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
test('strip', u'hello', u'hello', 'xyz')
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS') test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
if 0: if 0:
......
...@@ -1005,7 +1005,9 @@ static PyBufferProcs string_as_buffer = { ...@@ -1005,7 +1005,9 @@ static PyBufferProcs string_as_buffer = {
#define BOTHSTRIP 2 #define BOTHSTRIP 2
/* Arrays indexed by above */ /* Arrays indexed by above */
static const char *stripname[] = {"lstrip", "rstrip", "strip"}; static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static PyObject * static PyObject *
...@@ -1449,15 +1451,26 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args) ...@@ -1449,15 +1451,26 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
{ {
PyObject *sep = NULL; PyObject *sep = NULL;
if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep)) if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL; return NULL;
if (sep != NULL && sep != Py_None) { if (sep != NULL && sep != Py_None) {
/* XXX What about Unicode? */ if (PyString_Check(sep))
if (!PyString_Check(sep)) { return do_xstrip(self, striptype, sep);
else if (PyUnicode_Check(sep)) {
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
PyObject *res;
if (uniself==NULL)
return NULL;
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
striptype, sep);
Py_DECREF(uniself);
return res;
}
else {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"%s arg must be None or string", "%s arg must be None, str or unicode",
stripname[striptype]); STRIPNAME(striptype));
return NULL; return NULL;
} }
return do_xstrip(self, striptype, sep); return do_xstrip(self, striptype, sep);
...@@ -1468,11 +1481,12 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args) ...@@ -1468,11 +1481,12 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
static char strip__doc__[] = static char strip__doc__[] =
"S.strip([sep]) -> string\n\ "S.strip([sep]) -> string or unicode\n\
\n\ \n\
Return a copy of the string S with leading and trailing\n\ Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\ whitespace removed.\n\
If sep is given and not None, remove characters in sep instead."; If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject * static PyObject *
string_strip(PyStringObject *self, PyObject *args) string_strip(PyStringObject *self, PyObject *args)
...@@ -1485,10 +1499,11 @@ string_strip(PyStringObject *self, PyObject *args) ...@@ -1485,10 +1499,11 @@ string_strip(PyStringObject *self, PyObject *args)
static char lstrip__doc__[] = static char lstrip__doc__[] =
"S.lstrip([sep]) -> string\n\ "S.lstrip([sep]) -> string or unicode\n\
\n\ \n\
Return a copy of the string S with leading whitespace removed.\n\ Return a copy of the string S with leading whitespace removed.\n\
If sep is given and not None, remove characters in sep instead."; If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject * static PyObject *
string_lstrip(PyStringObject *self, PyObject *args) string_lstrip(PyStringObject *self, PyObject *args)
...@@ -1501,10 +1516,11 @@ string_lstrip(PyStringObject *self, PyObject *args) ...@@ -1501,10 +1516,11 @@ string_lstrip(PyStringObject *self, PyObject *args)
static char rstrip__doc__[] = static char rstrip__doc__[] =
"S.rstrip([sep]) -> string\n\ "S.rstrip([sep]) -> string or unicode\n\
\n\ \n\
Return a copy of the string S with trailing whitespace removed.\n\ Return a copy of the string S with trailing whitespace removed.\n\
If sep is given and not None, remove characters in sep instead."; If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject * static PyObject *
string_rstrip(PyStringObject *self, PyObject *args) string_rstrip(PyStringObject *self, PyObject *args)
......
...@@ -3503,35 +3503,6 @@ PyObject *split(PyUnicodeObject *self, ...@@ -3503,35 +3503,6 @@ PyObject *split(PyUnicodeObject *self,
return split_substring(self,list,substring,maxcount); return split_substring(self,list,substring,maxcount);
} }
static
PyObject *strip(PyUnicodeObject *self,
int left,
int right)
{
Py_UNICODE *p = self->str;
int start = 0;
int end = self->length;
if (left)
while (start < end && Py_UNICODE_ISSPACE(p[start]))
start++;
if (right)
while (end > start && Py_UNICODE_ISSPACE(p[end-1]))
end--;
if (start == 0 && end == self->length && PyUnicode_CheckExact(self)) {
/* couldn't strip anything off, return original string */
Py_INCREF(self);
return (PyObject*) self;
}
return (PyObject*) PyUnicode_FromUnicode(
self->str + start,
end - start
);
}
static static
PyObject *replace(PyUnicodeObject *self, PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *str1, PyUnicodeObject *str1,
...@@ -4464,17 +4435,173 @@ unicode_lower(PyUnicodeObject *self) ...@@ -4464,17 +4435,173 @@ unicode_lower(PyUnicodeObject *self)
return fixup(self, fixlower); return fixup(self, fixlower);
} }
#define LEFTSTRIP 0
#define RIGHTSTRIP 1
#define BOTHSTRIP 2
/* Arrays indexed by above */
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static const Py_UNICODE *
unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n)
{
int i;
for (i = 0; i<n; ++i)
if (s[i]==c)
return s+i;
return NULL;
}
/* externally visible for str.strip(unicode) */
PyObject *
_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
{
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
int len = PyUnicode_GET_SIZE(self);
Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
int seplen = PyUnicode_GET_SIZE(sepobj);
int i, j;
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && unicode_memchr(sep, s[i], seplen)) {
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && unicode_memchr(sep, s[j], seplen));
j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
else
return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject *
do_strip(PyUnicodeObject *self, int striptype)
{
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
int len = PyUnicode_GET_SIZE(self), i, j;
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && Py_UNICODE_ISSPACE(s[i])) {
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && Py_UNICODE_ISSPACE(s[j]));
j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
else
return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject *
do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
{
PyObject *sep = NULL;
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL;
if (sep != NULL && sep != Py_None) {
if (PyUnicode_Check(sep))
return _PyUnicode_XStrip(self, striptype, sep);
else if (PyString_Check(sep)) {
PyObject *res;
sep = PyUnicode_FromObject(sep);
if (sep==NULL)
return NULL;
res = _PyUnicode_XStrip(self, striptype, sep);
Py_DECREF(sep);
return res;
}
else {
PyErr_Format(PyExc_TypeError,
"%s arg must be None, unicode or str",
STRIPNAME(striptype));
return NULL;
}
}
return do_strip(self, striptype);
}
static char strip__doc__[] =
"S.strip([sep]) -> unicode\n\
\n\
Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_strip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, BOTHSTRIP); /* Common case */
else
return do_argstrip(self, BOTHSTRIP, args);
}
static char lstrip__doc__[] = static char lstrip__doc__[] =
"S.lstrip() -> unicode\n\ "S.lstrip([sep]) -> unicode\n\
\n\ \n\
Return a copy of the string S with leading whitespace removed."; Return a copy of the string S with leading whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject * static PyObject *
unicode_lstrip(PyUnicodeObject *self) unicode_lstrip(PyUnicodeObject *self, PyObject *args)
{ {
return strip(self, 1, 0); if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, LEFTSTRIP); /* Common case */
else
return do_argstrip(self, LEFTSTRIP, args);
} }
static char rstrip__doc__[] =
"S.rstrip([sep]) -> unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_rstrip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, RIGHTSTRIP); /* Common case */
else
return do_argstrip(self, RIGHTSTRIP, args);
}
static PyObject* static PyObject*
unicode_repeat(PyUnicodeObject *str, int len) unicode_repeat(PyUnicodeObject *str, int len)
{ {
...@@ -4677,17 +4804,6 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args) ...@@ -4677,17 +4804,6 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args)
return (PyObject*) pad(self, width - self->length, 0, ' '); return (PyObject*) pad(self, width - self->length, 0, ' ');
} }
static char rstrip__doc__[] =
"S.rstrip() -> unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.";
static PyObject *
unicode_rstrip(PyUnicodeObject *self)
{
return strip(self, 0, 1);
}
static PyObject* static PyObject*
unicode_slice(PyUnicodeObject *self, int start, int end) unicode_slice(PyUnicodeObject *self, int start, int end)
{ {
...@@ -4783,17 +4899,6 @@ PyObject *unicode_str(PyUnicodeObject *self) ...@@ -4783,17 +4899,6 @@ PyObject *unicode_str(PyUnicodeObject *self)
return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL); return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
} }
static char strip__doc__[] =
"S.strip() -> unicode\n\
\n\
Return a copy of S with leading and trailing whitespace removed.";
static PyObject *
unicode_strip(PyUnicodeObject *self)
{
return strip(self, 1, 1);
}
static char swapcase__doc__[] = static char swapcase__doc__[] =
"S.swapcase() -> unicode\n\ "S.swapcase() -> unicode\n\
\n\ \n\
...@@ -4966,14 +5071,14 @@ static PyMethodDef unicode_methods[] = { ...@@ -4966,14 +5071,14 @@ static PyMethodDef unicode_methods[] = {
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
{"lstrip", (PyCFunction) unicode_lstrip, METH_NOARGS, lstrip__doc__}, {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */ /* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
{"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
{"rstrip", (PyCFunction) unicode_rstrip, METH_NOARGS, rstrip__doc__}, {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
{"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__}, {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
{"strip", (PyCFunction) unicode_strip, METH_NOARGS, strip__doc__}, {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
{"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
{"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
{"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__}, {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment