Kaydet (Commit) ceee0773 authored tarafından Georg Brandl's avatar Georg Brandl

#1496: revert str.translate() to the old version, and add

str.maketrans() to make a table in a more comfortable way.
üst 45f9af34
......@@ -800,6 +800,21 @@ functions based on regular expressions.
'example.com'
.. method:: str.maketrans(x[, y[, z]])
This static method returns a translation table usable for :meth:`str.translate`.
If there is only one argument, it must be a dictionary mapping Unicode
ordinals (integers) or characters (strings of length 1) to Unicode ordinals,
strings (of arbitrary lengths) or None. Character keys will then be
converted to ordinals.
If there are two arguments, they must be strings of equal length, and in the
resulting dictionary, each character in x will be mapped to the character at
the same position in y. If there is a third argument, it must be a string,
whose characters will be mapped to None in the result.
.. method:: str.partition(sep)
Split the string at the first occurrence of *sep*, and return a 3-tuple
......@@ -934,15 +949,17 @@ functions based on regular expressions.
.. method:: str.translate(map)
Return a copy of the *s* where all characters have been mapped through the
*map* which must be a dictionary of characters (strings of length 1) or
Unicode ordinals (integers) to Unicode ordinals, strings or ``None``.
Unmapped characters are left untouched. Characters mapped to ``None`` are
deleted.
*map* which must be a dictionary of Unicode ordinals(integers) to Unicode
ordinals, strings or ``None``. Unmapped characters are left untouched.
Characters mapped to ``None`` are deleted.
A *map* for :meth:`translate` is usually best created by
:meth:`str.maketrans`.
.. note::
A more flexible approach is to create a custom character mapping codec
using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
An even more flexible approach is to create a custom character mapping
codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
example).
......
......@@ -166,18 +166,37 @@ class UnicodeTest(
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
def test_translate(self):
self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {'a':None, 'b':'<i>'})
self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
def test_maketrans_translate(self):
# these work with plain translate()
self.checkequalnofix('bbbc', 'abababc', 'translate',
{ord('a'): None})
self.checkequalnofix('iiic', 'abababc', 'translate',
{ord('a'): None, ord('b'): ord('i')})
self.checkequalnofix('iiix', 'abababc', 'translate',
{ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
self.checkequalnofix('c', 'abababc', 'translate',
{ord('a'): None, ord('b'): ''})
self.checkequalnofix('xyyx', 'xzx', 'translate',
{ord('z'): 'yy'})
# this needs maketrans()
self.checkequalnofix('abababc', 'abababc', 'translate',
{'b': '<i>'})
tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
# test alternative way of calling maketrans()
tbl = self.type2test.maketrans('abc', 'xyz', 'd')
self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
self.assertRaises(TypeError, self.type2test.maketrans)
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
self.assertRaises(TypeError, 'hello'.translate)
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
self.assertRaises(ValueError, 'abababc'.translate, {'xy':2})
self.assertRaises(TypeError, 'abababc'.translate, {(1,):2})
def test_split(self):
string_tests.CommonTest.test_split(self)
......
......@@ -7793,68 +7793,124 @@ unicode_swapcase(PyUnicodeObject *self)
return fixup(self, fixswapcase);
}
PyDoc_STRVAR(translate__doc__,
"S.translate(table) -> unicode\n\
PyDoc_STRVAR(maketrans__doc__,
"str.maketrans(x[, y[, z]]) -> dict (static method)\n\
\n\
Return a copy of the string S, where all characters have been mapped\n\
through the given translation table, which must be a mapping of\n\
Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
Unmapped characters are left untouched. Characters mapped to None\n\
are deleted.");
Return a translation table usable for str.translate().\n\
If there is only one argument, it must be a dictionary mapping Unicode\n\
ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
Character keys will then be converted to ordinals.\n\
If there are two arguments, they must be strings of equal length, and\n\
in the resulting dictionary, each character in x will be mapped to the\n\
character at the same position in y. If there is a third argument, it\n\
must be a string, whose characters will be mapped to None in the result.");
static PyObject*
unicode_translate(PyUnicodeObject *self, PyObject *table)
unicode_maketrans(PyUnicodeObject *null, PyObject *args)
{
PyObject *newtable = NULL;
PyObject *x, *y = NULL, *z = NULL;
PyObject *new = NULL, *key, *value;
Py_ssize_t i = 0;
PyObject *key, *value, *result;
if (!PyDict_Check(table)) {
PyErr_SetString(PyExc_TypeError, "translate argument must be a dict");
int res;
if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
return NULL;
}
/* fixup the table -- allow size-1 string keys instead of only int keys */
newtable = PyDict_Copy(table);
if (!newtable) return NULL;
while (PyDict_Next(table, &i, &key, &value)) {
if (PyUnicode_Check(key)) {
/* convert string keys to integer keys */
PyObject *newkey;
int res;
if (PyUnicode_GET_SIZE(key) != 1) {
PyErr_SetString(PyExc_ValueError, "string items in translate "
"table must be 1 element long");
goto err;
}
newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
if (!newkey)
new = PyDict_New();
if (!new)
return NULL;
if (y != NULL) {
/* x must be a string too, of equal length */
Py_ssize_t ylen = PyUnicode_GET_SIZE(y);
if (!PyUnicode_Check(x)) {
PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
"be a string if there is a second argument");
goto err;
}
if (PyUnicode_GET_SIZE(x) != ylen) {
PyErr_SetString(PyExc_ValueError, "the first two maketrans "
"arguments must have equal length");
goto err;
}
/* create entries for translating chars in x to those in y */
for (i = 0; i < PyUnicode_GET_SIZE(x); i++) {
key = PyInt_FromLong(PyUnicode_AS_UNICODE(x)[i]);
value = PyInt_FromLong(PyUnicode_AS_UNICODE(y)[i]);
if (!key || !value)
goto err;
res = PyDict_SetItem(newtable, newkey, value);
Py_DECREF(newkey);
res = PyDict_SetItem(new, key, value);
Py_DECREF(key);
Py_DECREF(value);
if (res < 0)
goto err;
} else if (PyInt_Check(key)) {
/* just keep integer keys */
if (PyDict_SetItem(newtable, key, value) < 0)
goto err;
} else {
PyErr_SetString(PyExc_TypeError, "items in translate table must be "
"strings or integers");
}
/* create entries for deleting chars in z */
if (z != NULL) {
for (i = 0; i < PyUnicode_GET_SIZE(z); i++) {
key = PyInt_FromLong(PyUnicode_AS_UNICODE(z)[i]);
if (!key)
goto err;
res = PyDict_SetItem(new, key, Py_None);
Py_DECREF(key);
if (res < 0)
goto err;
}
}
} else {
/* x must be a dict */
if (!PyDict_Check(x)) {
PyErr_SetString(PyExc_TypeError, "if you give only one argument "
"to maketrans it must be a dict");
goto err;
}
/* copy entries into the new dict, converting string keys to int keys */
while (PyDict_Next(x, &i, &key, &value)) {
if (PyUnicode_Check(key)) {
/* convert string keys to integer keys */
PyObject *newkey;
if (PyUnicode_GET_SIZE(key) != 1) {
PyErr_SetString(PyExc_ValueError, "string keys in translate "
"table must be of length 1");
goto err;
}
newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
if (!newkey)
goto err;
res = PyDict_SetItem(new, newkey, value);
Py_DECREF(newkey);
if (res < 0)
goto err;
} else if (PyInt_Check(key)) {
/* just keep integer keys */
if (PyDict_SetItem(new, key, value) < 0)
goto err;
} else {
PyErr_SetString(PyExc_TypeError, "keys in translate table must "
"be strings or integers");
goto err;
}
}
}
result = PyUnicode_TranslateCharmap(self->str,
self->length,
newtable,
"ignore");
Py_DECREF(newtable);
return result;
return new;
err:
Py_DECREF(newtable);
Py_DECREF(new);
return NULL;
}
PyDoc_STRVAR(translate__doc__,
"S.translate(table) -> unicode\n\
\n\
Return a copy of the string S, where all characters have been mapped\n\
through the given translation table, which must be a mapping of\n\
Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
Unmapped characters are left untouched. Characters mapped to None\n\
are deleted.");
static PyObject*
unicode_translate(PyUnicodeObject *self, PyObject *table)
{
return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
}
PyDoc_STRVAR(upper__doc__,
"S.upper() -> unicode\n\
\n\
......@@ -8076,6 +8132,8 @@ static PyMethodDef unicode_methods[] = {
{"__format__", (PyCFunction) unicode_unicode__format__, METH_VARARGS, p_format__doc__},
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
{"maketrans", (PyCFunction) unicode_maketrans,
METH_VARARGS | METH_STATIC, maketrans__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment