Kaydet (Commit) 621ef3d8 authored tarafından Victor Stinner's avatar Victor Stinner

Issue #15609: Optimize str%args for integer argument

 - Use _PyLong_FormatWriter() instead of formatlong() when possible, to avoid
   a temporary buffer
 - Enable the fast path when width is smaller or equals to the length,
   and when the precision is bigger or equals to the length
 - Add unit tests!
 - formatlong() uses PyUnicode_Resize() instead of _PyUnicode_FromASCII()
   to resize the output string
üst fd0d3e5d
...@@ -307,6 +307,22 @@ class FormatTest(unittest.TestCase): ...@@ -307,6 +307,22 @@ class FormatTest(unittest.TestCase):
finally: finally:
locale.setlocale(locale.LC_ALL, oldloc) locale.setlocale(locale.LC_ALL, oldloc)
@support.cpython_only
def test_optimisations(self):
text = "abcde" # 5 characters
self.assertIs("%s" % text, text)
self.assertIs("%.5s" % text, text)
self.assertIs("%.10s" % text, text)
self.assertIs("%1s" % text, text)
self.assertIs("%5s" % text, text)
self.assertIs("{0}".format(text), text)
self.assertIs("{0:s}".format(text), text)
self.assertIs("{0:.5s}".format(text), text)
self.assertIs("{0:.10s}".format(text), text)
self.assertIs("{0:1s}".format(text), text)
self.assertIs("{0:5s}".format(text), text)
def test_main(): def test_main():
......
...@@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type) ...@@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type)
assert(PyLong_Check(val)); assert(PyLong_Check(val));
switch (type) { switch (type) {
default:
assert(!"'type' not in [diuoxX]");
case 'd': case 'd':
case 'i':
case 'u': case 'u':
/* Special-case boolean: we want 0/1 */ /* Special-case boolean: we want 0/1 */
if (PyBool_Check(val)) if (PyBool_Check(val))
...@@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type) ...@@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
numnondigits = 2; numnondigits = 2;
result = PyNumber_ToBase(val, 16); result = PyNumber_ToBase(val, 16);
break; break;
default:
assert(!"'type' not in [duoxX]");
} }
if (!result) if (!result)
return NULL; return NULL;
...@@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type) ...@@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type)
if (buf[i] >= 'a' && buf[i] <= 'x') if (buf[i] >= 'a' && buf[i] <= 'x')
buf[i] -= 'a'-'A'; buf[i] -= 'a'-'A';
} }
if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { if (!PyUnicode_Check(result)
|| buf != PyUnicode_DATA(result)) {
PyObject *unicode; PyObject *unicode;
unicode = _PyUnicode_FromASCII(buf, len); unicode = _PyUnicode_FromASCII(buf, len);
Py_DECREF(result); Py_DECREF(result);
result = unicode; result = unicode;
} }
else if (len != PyUnicode_GET_LENGTH(result)) {
if (PyUnicode_Resize(&result, len) < 0)
Py_CLEAR(result);
}
return result; return result;
} }
/* Format an integer.
* Return 1 if the number has been formatted into the writer,
* 0 if the number has been formatted into *p_result
* -1 and raise an exception on error */
static int
mainformatlong(_PyUnicodeWriter *writer, PyObject *v,
int c, Py_ssize_t width, int prec, int flags,
PyObject **p_result)
{
PyObject *iobj, *res;
if (!PyNumber_Check(v))
goto wrongtype;
if (!PyLong_Check(v)) {
iobj = PyNumber_Long(v);
if (iobj == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
goto wrongtype;
return -1;
}
assert(PyLong_Check(iobj));
}
else {
iobj = v;
Py_INCREF(iobj);
}
if (PyLong_CheckExact(v)
&& width == -1 && prec == -1
&& !(flags & (F_SIGN | F_BLANK))
&& c != 'X')
{
/* Fast path */
int alternate = flags & F_ALT;
int base;
switch(c)
{
default:
assert(0 && "'type' not in [diuoxX]");
case 'd':
case 'i':
case 'u':
base = 10;
break;
case 'o':
base = 8;
break;
case 'x':
case 'X':
base = 16;
break;
}
if (_PyLong_FormatWriter(writer, v, base, alternate) == -1)
return -1;
return 1;
}
res = formatlong(iobj, flags, prec, c);
Py_DECREF(iobj);
if (res == NULL)
return -1;
*p_result = res;
return 0;
wrongtype:
PyErr_Format(PyExc_TypeError,
"%%%c format: a number is required, "
"not %.200s", (char)c, Py_TYPE(v)->tp_name);
return -1;
}
static Py_UCS4 static Py_UCS4
formatchar(PyObject *v) formatchar(PyObject *v)
{ {
...@@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
Py_UCS4 fill; Py_UCS4 fill;
int sign; int sign;
Py_UCS4 signchar; Py_UCS4 signchar;
int isnumok;
PyObject *v = NULL; PyObject *v = NULL;
void *pbuf = NULL; void *pbuf = NULL;
Py_ssize_t pindex, len; Py_ssize_t pindex, len;
...@@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 'o': case 'o':
case 'x': case 'x':
case 'X': case 'X':
if (PyLong_CheckExact(v) {
&& width == -1 && prec == -1 int ret = mainformatlong(&writer, v, c, width, prec,
&& !(flags & (F_SIGN | F_BLANK))) flags, &temp);
{ if (ret == 1)
/* Fast path */ goto nextarg;
switch(c) if (ret == -1)
{
case 'd':
case 'i':
case 'u':
if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
goto onError;
goto nextarg;
case 'x':
if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
goto onError;
goto nextarg;
case 'o':
if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
goto onError;
goto nextarg;
default:
break;
}
}
isnumok = 0;
if (PyNumber_Check(v)) {
PyObject *iobj=NULL;
if (PyLong_Check(v)) {
iobj = v;
Py_INCREF(iobj);
}
else {
iobj = PyNumber_Long(v);
}
if (iobj!=NULL) {
if (PyLong_Check(iobj)) {
isnumok = 1;
sign = 1;
temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
Py_DECREF(iobj);
}
else {
Py_DECREF(iobj);
}
}
}
if (!isnumok) {
PyErr_Format(PyExc_TypeError,
"%%%c format: a number is required, "
"not %.200s", (char)c, Py_TYPE(v)->tp_name);
goto onError; goto onError;
} sign = 1;
if (flags & F_ZERO) if (flags & F_ZERO)
fill = '0'; fill = '0';
break; break;
}
case 'e': case 'e':
case 'E': case 'E':
...@@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
goto onError; goto onError;
assert (PyUnicode_Check(temp)); assert (PyUnicode_Check(temp));
if (width == -1 && prec == -1 if (PyUnicode_READY(temp) == -1) {
Py_CLEAR(temp);
goto onError;
}
len = PyUnicode_GET_LENGTH(temp);
if ((width == -1 || width <= len)
&& (prec == -1 || prec >= len)
&& !(flags & (F_SIGN | F_BLANK))) && !(flags & (F_SIGN | F_BLANK)))
{ {
/* Fast path */ /* Fast path */
...@@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
goto nextarg; goto nextarg;
} }
if (PyUnicode_READY(temp) == -1) {
Py_CLEAR(temp);
goto onError;
}
kind = PyUnicode_KIND(temp);
pbuf = PyUnicode_DATA(temp);
len = PyUnicode_GET_LENGTH(temp);
if (c == 's' || c == 'r' || c == 'a') { if (c == 's' || c == 'r' || c == 'a') {
if (prec >= 0 && len > prec) if (prec >= 0 && len > prec)
len = prec; len = prec;
} }
/* pbuf is initialized here. */ /* pbuf is initialized here. */
kind = PyUnicode_KIND(temp);
pbuf = PyUnicode_DATA(temp);
pindex = 0; pindex = 0;
if (sign) { if (sign) {
Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
......
...@@ -757,7 +757,8 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format, ...@@ -757,7 +757,8 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format,
goto done; goto done;
} }
if (format->width == -1 && format->precision == -1) { if ((format->width == -1 || format->width <= len)
&& (format->precision == -1 || format->precision >= len)) {
/* Fast path */ /* Fast path */
return _PyUnicodeWriter_WriteStr(writer, value); return _PyUnicodeWriter_WriteStr(writer, value);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment