Kaydet (Commit) e215d960 authored tarafından Victor Stinner's avatar Victor Stinner

Issue #16147: Rewrite PyUnicode_FromFormatV() to use _PyUnicodeWriter API

 * Simplify the code: replace 4 steps with one unique step using the
   _PyUnicodeWriter API. PyUnicode_Format() has the same design. It avoids to
   store intermediate results which require to allocate an array of pointers on
   the heap.
 * Use the _PyUnicodeWriter API for speed (and its convinient API):
   overallocate the buffer to reduce the number of "realloc()"
 * Implement "width" and "precision" in Python, don't rely on sprintf(). It
   avoids to need of a temporary buffer allocated on the heap: only use a small
   buffer allocated in the stack.
 * Add _PyUnicodeWriter_WriteCstr() function
 * Split PyUnicode_FromFormatV() into two functions: add
   unicode_fromformat_arg().
 * Inline parse_format_flags(): the format of an argument is now only parsed
   once, it's no more needed to have a subfunction.
 * Optimize PyUnicode_FromFormatV() for characters between two "%" arguments:
   search the next "%" and copy the substring in one chunk, instead of copying
   character per character.
üst 2a09b6e8
......@@ -933,12 +933,28 @@ PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);
/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);
/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);
/* Get the value of the write as an Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
#endif
......
......@@ -1769,6 +1769,22 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
# test padding (width and/or precision)
self.assertEqual(PyUnicode_FromFormat(b'%010i', c_int(123)), '123'.rjust(10, '0'))
self.assertEqual(PyUnicode_FromFormat(b'%100i', c_int(123)), '123'.rjust(100))
self.assertEqual(PyUnicode_FromFormat(b'%.100i', c_int(123)), '123'.rjust(100, '0'))
self.assertEqual(PyUnicode_FromFormat(b'%100.80i', c_int(123)), '123'.rjust(80, '0').rjust(100))
self.assertEqual(PyUnicode_FromFormat(b'%010u', c_uint(123)), '123'.rjust(10, '0'))
self.assertEqual(PyUnicode_FromFormat(b'%100u', c_uint(123)), '123'.rjust(100))
self.assertEqual(PyUnicode_FromFormat(b'%.100u', c_uint(123)), '123'.rjust(100, '0'))
self.assertEqual(PyUnicode_FromFormat(b'%100.80u', c_uint(123)), '123'.rjust(80, '0').rjust(100))
self.assertEqual(PyUnicode_FromFormat(b'%010x', c_int(0x123)), '123'.rjust(10, '0'))
self.assertEqual(PyUnicode_FromFormat(b'%100x', c_int(0x123)), '123'.rjust(100))
self.assertEqual(PyUnicode_FromFormat(b'%.100x', c_int(0x123)), '123'.rjust(100, '0'))
self.assertEqual(PyUnicode_FromFormat(b'%100.80x', c_int(0x123)), '123'.rjust(80, '0').rjust(100))
# test %A
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment