#1496: revert str.translate() to the old version, and add

str.maketrans() to make a table in a more comfortable way.

#1496: revert str.translate() to the old version, and add
str.maketrans() to make a table in a more comfortable way.
ceee0773 · Georg Brandl · 45f9af34 · ceee0773 · ceee0773 · ceee0773
Kaydet (Commit) ceee0773 authored Kas 27, 2007 tarafından Georg Brandl
Hide whitespace changes
Inline Side-by-side

Showing with 155 additions and 61 deletions

stdtypes.rst Doc/library/stdtypes.rst +23 -6

test_unicode.py Lib/test/test_unicode.py +28 -9

unicodeobject.c Objects/unicodeobject.c +104 -46

No files found.
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -800,6 +800,21 @@ functions based on regular expressions.
      'example.com'


+.. method:: str.maketrans(x[, y[, z]])
+
+   This static method returns a translation table usable for :meth:`str.translate`.
+
+   If there is only one argument, it must be a dictionary mapping Unicode
+   ordinals (integers) or characters (strings of length 1) to Unicode ordinals,
+   strings (of arbitrary lengths) or None.  Character keys will then be
+   converted to ordinals.
+
+   If there are two arguments, they must be strings of equal length, and in the
+   resulting dictionary, each character in x will be mapped to the character at
+   the same position in y.  If there is a third argument, it must be a string,
+   whose characters will be mapped to None in the result.
+
+
 .. method:: str.partition(sep)

   Split the string at the first occurrence of *sep*, and return a 3-tuple
@@ -934,15 +949,17 @@ functions based on regular expressions.
 .. method:: str.translate(map)

   Return a copy of the *s* where all characters have been mapped through the
-   *map* which must be a dictionary of characters (strings of length 1) or
-   Unicode ordinals (integers) to Unicode ordinals, strings or ``None``.
-   Unmapped characters are left untouched.  Characters mapped to ``None`` are
-   deleted.
+   *map* which must be a dictionary of Unicode ordinals(integers) to Unicode
+   ordinals, strings or ``None``.  Unmapped characters are left untouched.
+   Characters mapped to ``None`` are deleted.
+
+   A *map* for :meth:`translate` is usually best created by
+   :meth:`str.maketrans`.

   .. note::

-      A more flexible approach is to create a custom character mapping codec
-      using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
+      An even more flexible approach is to create a custom character mapping
+      codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
      example).



--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -166,18 +166,37 @@ class UnicodeTest(
        self.assertRaises(ValueError, 'abcdefghi'.rindex,  'ghi', 0, 8)
        self.assertRaises(ValueError, 'abcdefghi'.rindex,  'ghi', 0, -1)

-    def test_translate(self):
-        self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
-        self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
-        self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
-        self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {'a':None, 'b':'<i>'})
-        self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
-        self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
+    def test_maketrans_translate(self):
+        # these work with plain translate()
+        self.checkequalnofix('bbbc', 'abababc', 'translate',
+                             {ord('a'): None})
+        self.checkequalnofix('iiic', 'abababc', 'translate',
+                             {ord('a'): None, ord('b'): ord('i')})
+        self.checkequalnofix('iiix', 'abababc', 'translate',
+                             {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
+        self.checkequalnofix('c', 'abababc', 'translate',
+                             {ord('a'): None, ord('b'): ''})
+        self.checkequalnofix('xyyx', 'xzx', 'translate',
+                             {ord('z'): 'yy'})
+        # this needs maketrans()
+        self.checkequalnofix('abababc', 'abababc', 'translate',
+                             {'b': '<i>'})
+        tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
+        self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
+        # test alternative way of calling maketrans()
+        tbl = self.type2test.maketrans('abc', 'xyz', 'd')
+        self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
+
+        self.assertRaises(TypeError, self.type2test.maketrans)
+        self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
+        self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
+        self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
+        self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
+        self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
+        self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})

        self.assertRaises(TypeError, 'hello'.translate)
        self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
-        self.assertRaises(ValueError, 'abababc'.translate, {'xy':2})
-        self.assertRaises(TypeError, 'abababc'.translate, {(1,):2})

    def test_split(self):
        string_tests.CommonTest.test_split(self)

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7793,68 +7793,124 @@ unicode_swapcase(PyUnicodeObject *self)
    return fixup(self, fixswapcase);
 }

-PyDoc_STRVAR(translate__doc__,
-"S.translate(table) -> unicode\n\
+PyDoc_STRVAR(maketrans__doc__,
+"str.maketrans(x[, y[, z]]) -> dict (static method)\n\
 \n\
-Return a copy of the string S, where all characters have been mapped\n\
-through the given translation table, which must be a mapping of\n\
-Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
-Unmapped characters are left untouched. Characters mapped to None\n\
-are deleted.");
+Return a translation table usable for str.translate().\n\
+If there is only one argument, it must be a dictionary mapping Unicode\n\
+ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
+Character keys will then be converted to ordinals.\n\
+If there are two arguments, they must be strings of equal length, and\n\
+in the resulting dictionary, each character in x will be mapped to the\n\
+character at the same position in y. If there is a third argument, it\n\
+must be a string, whose characters will be mapped to None in the result.");

 static PyObject*
-unicode_translate(PyUnicodeObject *self, PyObject *table)
+unicode_maketrans(PyUnicodeObject *null, PyObject *args)
 {
-    PyObject *newtable = NULL;
+    PyObject *x, *y = NULL, *z = NULL;
+    PyObject *new = NULL, *key, *value;
    Py_ssize_t i = 0;
-    PyObject *key, *value, *result;
-
-    if (!PyDict_Check(table)) {
-        PyErr_SetString(PyExc_TypeError, "translate argument must be a dict");
+    int res;
+    
+    if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
        return NULL;
-    }
-    /* fixup the table -- allow size-1 string keys instead of only int keys */
-    newtable = PyDict_Copy(table);
-    if (!newtable) return NULL;
-    while (PyDict_Next(table, &i, &key, &value)) {
-        if (PyUnicode_Check(key)) {
-            /* convert string keys to integer keys */
-            PyObject *newkey;
-            int res;
-            if (PyUnicode_GET_SIZE(key) != 1) {
-                PyErr_SetString(PyExc_ValueError, "string items in translate "
-                                "table must be 1 element long");
-                goto err;
-            }
-            newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
-            if (!newkey)
+    new = PyDict_New();
+    if (!new)
+        return NULL;
+    if (y != NULL) {
+        /* x must be a string too, of equal length */
+        Py_ssize_t ylen = PyUnicode_GET_SIZE(y);
+        if (!PyUnicode_Check(x)) {
+            PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
+                            "be a string if there is a second argument");
+            goto err;
+        }
+        if (PyUnicode_GET_SIZE(x) != ylen) {
+            PyErr_SetString(PyExc_ValueError, "the first two maketrans "
+                            "arguments must have equal length");
+            goto err;
+        }
+        /* create entries for translating chars in x to those in y */
+        for (i = 0; i < PyUnicode_GET_SIZE(x); i++) {
+            key = PyInt_FromLong(PyUnicode_AS_UNICODE(x)[i]);
+            value = PyInt_FromLong(PyUnicode_AS_UNICODE(y)[i]);
+            if (!key || !value)
                goto err;
-            res = PyDict_SetItem(newtable, newkey, value);
-            Py_DECREF(newkey);
+            res = PyDict_SetItem(new, key, value);
+            Py_DECREF(key);
+            Py_DECREF(value);
            if (res < 0)
                goto err;
-        } else if (PyInt_Check(key)) {
-            /* just keep integer keys */
-            if (PyDict_SetItem(newtable, key, value) < 0)
-                goto err;
-        } else {
-            PyErr_SetString(PyExc_TypeError, "items in translate table must be "
-                            "strings or integers");
+        }
+        /* create entries for deleting chars in z */
+        if (z != NULL) {
+            for (i = 0; i < PyUnicode_GET_SIZE(z); i++) {
+                key = PyInt_FromLong(PyUnicode_AS_UNICODE(z)[i]);
+                if (!key)
+                    goto err;
+                res = PyDict_SetItem(new, key, Py_None);
+                Py_DECREF(key);
+                if (res < 0)
+                    goto err;
+            }
+        }
+    } else {
+        /* x must be a dict */
+        if (!PyDict_Check(x)) {
+            PyErr_SetString(PyExc_TypeError, "if you give only one argument "
+                            "to maketrans it must be a dict");
            goto err;
        }
+        /* copy entries into the new dict, converting string keys to int keys */
+        while (PyDict_Next(x, &i, &key, &value)) {
+            if (PyUnicode_Check(key)) {
+                /* convert string keys to integer keys */
+                PyObject *newkey;
+                if (PyUnicode_GET_SIZE(key) != 1) {
+                    PyErr_SetString(PyExc_ValueError, "string keys in translate "
+                                    "table must be of length 1");
+                    goto err;
+                }
+                newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
+                if (!newkey)
+                    goto err;
+                res = PyDict_SetItem(new, newkey, value);
+                Py_DECREF(newkey);
+                if (res < 0)
+                    goto err;
+            } else if (PyInt_Check(key)) {
+                /* just keep integer keys */
+                if (PyDict_SetItem(new, key, value) < 0)
+                    goto err;
+            } else {
+                PyErr_SetString(PyExc_TypeError, "keys in translate table must "
+                                "be strings or integers");
+                goto err;
+            }
+        }
    }
-
-    result = PyUnicode_TranslateCharmap(self->str,
-                                        self->length,
-                                        newtable,
-                                        "ignore");
-    Py_DECREF(newtable);
-    return result;
+    return new;
  err:
-    Py_DECREF(newtable);
+    Py_DECREF(new);
    return NULL;
 }

+PyDoc_STRVAR(translate__doc__,
+"S.translate(table) -> unicode\n\
+\n\
+Return a copy of the string S, where all characters have been mapped\n\
+through the given translation table, which must be a mapping of\n\
+Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
+Unmapped characters are left untouched. Characters mapped to None\n\
+are deleted.");
+
+static PyObject*
+unicode_translate(PyUnicodeObject *self, PyObject *table)
+{
+    return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
+}
+
 PyDoc_STRVAR(upper__doc__,
 "S.upper() -> unicode\n\
 \n\
@@ -8076,6 +8132,8 @@ static PyMethodDef unicode_methods[] = {
    {"__format__", (PyCFunction) unicode_unicode__format__, METH_VARARGS, p_format__doc__},
    {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
    {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
+    {"maketrans", (PyCFunction) unicode_maketrans,
+     METH_VARARGS | METH_STATIC, maketrans__doc__},
 #if 0
    {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
 #endif