Unverified Kaydet (Commit) 59423e3d authored tarafından Victor Stinner's avatar Victor Stinner Kaydeden (comit) GitHub

bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623)

Fix str.format(), float.__format__() and complex.__format__() methods
for non-ASCII decimal point when using the "n" formatter.

Changes:

* Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires
  a _PyUnicodeWriter object for the buffer and a Python str object
  for digits.
* Rename FILL() macro to unicode_fill(), convert it to static inline function,
  add "assert(0 <= start);" and rework its code.
üst df108dc6
...@@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( ...@@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
see Objects/stringlib/localeutil.h */ see Objects/stringlib/localeutil.h */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
PyObject *unicode, _PyUnicodeWriter *writer,
Py_ssize_t index,
Py_ssize_t n_buffer, Py_ssize_t n_buffer,
void *digits, PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits, Py_ssize_t n_digits,
Py_ssize_t min_width, Py_ssize_t min_width,
const char *grouping, const char *grouping,
......
For :meth:`str.format`, :meth:`float.__format__` and
:meth:`complex.__format__` methods for non-ASCII decimal point when using
the "n" formatter.
/* stringlib: locale related helpers implementation */ /* _PyUnicode_InsertThousandsGrouping() helper functions */
#include <locale.h>
#if !STRINGLIB_IS_UNICODE
# error "localeutil.h is specific to Unicode"
#endif
typedef struct { typedef struct {
const char *grouping; const char *grouping;
char previous; char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */ Py_ssize_t i; /* Where we're currently pointing in grouping. */
} STRINGLIB(GroupGenerator); } GroupGenerator;
static void static void
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping) GroupGenerator_init(GroupGenerator *self, const char *grouping)
{ {
self->grouping = grouping; self->grouping = grouping;
self->i = 0; self->i = 0;
self->previous = 0; self->previous = 0;
} }
/* Returns the next grouping, or 0 to signify end. */ /* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t static Py_ssize_t
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) GroupGenerator_next(GroupGenerator *self)
{ {
/* Note that we don't really do much error checking here. If a /* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just grouping string contains just CHAR_MAX, for example, then just
...@@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) ...@@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
} }
} }
/* Fill in some digits, leading zeros, and thousands separator. All /* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */ are optional, depending on when we're called. */
static void static void
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep, PyObject *digits, Py_ssize_t *digits_pos,
Py_ssize_t thousands_sep_len) Py_ssize_t n_chars, Py_ssize_t n_zeros,
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
Py_UCS4 *maxchar)
{ {
Py_ssize_t i; if (!writer) {
/* if maxchar > 127, maxchar is already set */
if (*maxchar == 127 && thousands_sep) {
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
*maxchar = Py_MAX(*maxchar, maxchar2);
}
return;
}
if (thousands_sep) { if (thousands_sep) {
*buffer_end -= thousands_sep_len; *buffer_pos -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */ /* Copy the thousands_sep chars into the buffer. */
memcpy(*buffer_end, thousands_sep, _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
thousands_sep_len * STRINGLIB_SIZEOF_CHAR); thousands_sep, 0,
} thousands_sep_len);
*buffer_end -= n_chars;
*digits_end -= n_chars;
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
*buffer_end -= n_zeros;
for (i = 0; i < n_zeros; i++)
(*buffer_end)[i] = '0';
}
/**
* InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
* is non-NULL, this is unused.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @min_width: The minimum width of the digits in the output string.
* Output will be zero-padded on the left to fill.
* @grouping: see definition in localeconv().
* @thousands_sep: see definition in localeconv().
*
* There are 2 modes: counting and filling. If @buffer is NULL,
* we are in counting mode, else filling mode.
* If counting, the required buffer size is returned.
* If filling, we know the buffer will be large enough, so we don't
* need to pass in the buffer size.
* Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into the string between buffer and buffer+n_digits.
*
* Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL.
*
* This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string.
*
* As closely as possible, this code mimics the logic in decimal.py's
_insert_thousands_sep().
**/
static Py_ssize_t
STRINGLIB(InsertThousandsGrouping)(
STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
STRINGLIB_CHAR *thousands_sep,
Py_ssize_t thousands_sep_len)
{
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
int use_separator = 0; /* First time through, don't append the
separator. They only go between
groups. */
STRINGLIB_CHAR *buffer_end = NULL;
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
returns 0. */
STRINGLIB(GroupGenerator) groupgen;
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
if (buffer) {
buffer_end = buffer + n_buffer;
digits_end = digits + n_digits;
}
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
/* Use a separator next time. */
use_separator = 1;
remaining -= n_chars;
min_width -= l;
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
min_width -= thousands_sep_len;
} }
if (!loop_broken) {
/* We left the loop without using a break statement. */
l = Py_MAX(Py_MAX(remaining, min_width), 1); *buffer_pos -= n_chars;
n_zeros = Py_MAX(0, l - remaining); *digits_pos -= n_chars;
n_chars = Py_MAX(0, Py_MIN(remaining, l)); _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
digits, *digits_pos,
/* Use n_zero zero's and n_chars chars */ n_chars);
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) { if (n_zeros) {
/* Copy into the output buffer. */ *buffer_pos -= n_zeros;
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros, enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
use_separator ? thousands_sep : NULL, thousands_sep_len); void *data = PyUnicode_DATA(writer->buffer);
} unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
} }
return count;
} }
This diff is collapsed.
...@@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, ...@@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
/* not all fields of format are used. for example, precision is /* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier about what it does? or is passing a single format parameter easier
and more efficient enough to justify a little obfuscation? */ and more efficient enough to justify a little obfuscation?
Return -1 on error. */
static Py_ssize_t static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
...@@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, ...@@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 grouping_maxchar; Py_UCS4 grouping_maxchar;
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
NULL, 0, NULL, 0,
0, NULL, NULL, 0, spec->n_digits,
spec->n_digits, spec->n_min_width, spec->n_min_width,
locale->grouping, locale->thousands_sep, &grouping_maxchar); locale->grouping, locale->thousands_sep, &grouping_maxchar);
if (spec->n_grouped_digits == -1) {
return -1;
}
*maxchar = Py_MAX(*maxchar, grouping_maxchar); *maxchar = Py_MAX(*maxchar, grouping_maxchar);
} }
...@@ -635,26 +639,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, ...@@ -635,26 +639,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
/* Only for type 'c' special case, it has no digits. */ /* Only for type 'c' special case, it has no digits. */
if (spec->n_digits != 0) { if (spec->n_digits != 0) {
/* Fill the digits with InsertThousandsGrouping. */ /* Fill the digits with InsertThousandsGrouping. */
char *pdigits;
if (PyUnicode_READY(digits))
return -1;
pdigits = PyUnicode_DATA(digits);
if (PyUnicode_KIND(digits) < kind) {
pdigits = _PyUnicode_AsKind(digits, kind);
if (pdigits == NULL)
return -1;
}
r = _PyUnicode_InsertThousandsGrouping( r = _PyUnicode_InsertThousandsGrouping(
writer->buffer, writer->pos, writer, spec->n_grouped_digits,
spec->n_grouped_digits, digits, d_pos, spec->n_digits,
pdigits + kind * d_pos, spec->n_min_width,
spec->n_digits, spec->n_min_width,
locale->grouping, locale->thousands_sep, NULL); locale->grouping, locale->thousands_sep, NULL);
if (r == -1) if (r == -1)
return -1; return -1;
assert(r == spec->n_grouped_digits); assert(r == spec->n_grouped_digits);
if (PyUnicode_KIND(digits) < kind)
PyMem_Free(pdigits);
d_pos += spec->n_digits; d_pos += spec->n_digits;
} }
if (toupper) { if (toupper) {
...@@ -994,6 +986,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, ...@@ -994,6 +986,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
inumeric_chars + n_digits, n_remainder, 0, inumeric_chars + n_digits, n_remainder, 0,
&locale, format, &maxchar); &locale, format, &maxchar);
if (n_total == -1) {
goto done;
}
/* Allocate the memory. */ /* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
...@@ -1139,6 +1134,9 @@ format_float_internal(PyObject *value, ...@@ -1139,6 +1134,9 @@ format_float_internal(PyObject *value,
n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
index + n_digits, n_remainder, has_decimal, index + n_digits, n_remainder, has_decimal,
&locale, format, &maxchar); &locale, format, &maxchar);
if (n_total == -1) {
goto done;
}
/* Allocate the memory. */ /* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
...@@ -1322,6 +1320,9 @@ format_complex_internal(PyObject *value, ...@@ -1322,6 +1320,9 @@ format_complex_internal(PyObject *value,
i_re, i_re + n_re_digits, n_re_remainder, i_re, i_re + n_re_digits, n_re_remainder,
re_has_decimal, &locale, &tmp_format, re_has_decimal, &locale, &tmp_format,
&maxchar); &maxchar);
if (n_re_total == -1) {
goto done;
}
/* Same formatting, but always include a sign, unless the real part is /* Same formatting, but always include a sign, unless the real part is
* going to be omitted, in which case we use whatever sign convention was * going to be omitted, in which case we use whatever sign convention was
...@@ -1332,6 +1333,9 @@ format_complex_internal(PyObject *value, ...@@ -1332,6 +1333,9 @@ format_complex_internal(PyObject *value,
i_im, i_im + n_im_digits, n_im_remainder, i_im, i_im + n_im_digits, n_im_remainder,
im_has_decimal, &locale, &tmp_format, im_has_decimal, &locale, &tmp_format,
&maxchar); &maxchar);
if (n_im_total == -1) {
goto done;
}
if (skip_re) if (skip_re)
n_re_total = 0; n_re_total = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment