Kaydet (Commit) aca19e6a authored tarafından Eric Smith's avatar Eric Smith

Backport of some of the work in r71665 to trunk. This reworks much of

int, long, and float __format__(), and it keeps their implementation
in sync with py3k.

Also added PyOS_double_to_string. This is the "fallback" version
that's also available in trunk, and should be kept in sync with that
code. I'll add an issue to document PyOS_double_to_string in the C
API.

There are many internal cleanups. Externally visible changes include:

- Implement PEP 378, Format Specifier for Thousands Separator, for
  floats, ints, and longs.

- Issue #5515: 'n' formatting for ints, longs, and floats handles
  leading zero formatting poorly.

- Issue #5772: For float.__format__, don't add a trailing ".0" if
  we're using no type code and we have an exponent.
üst cbb53087
......@@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d);
/* The caller is responsible for calling PyMem_Free to free the buffer
that's is returned. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type);
/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
#define Py_DTSF_SIGN 0x01 /* always add the sign */
#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code
specific */
/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */
#define Py_DTST_FINITE 0
#define Py_DTST_INFINITE 1
#define Py_DTST_NAN 2
#ifdef __cplusplus
}
......
......@@ -177,16 +177,26 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
strings) */
);
/* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGroupingLocale(char *buffer,
Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width);
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char);
/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
......
......@@ -232,6 +232,10 @@ class FormatTest(unittest.TestCase):
testboth("%o", -042L, "-42")
testboth("%o", float(042), "42")
# alternate float formatting
testformat('%g', 1.1, '1.1')
testformat('%#g', 1.1, '1.10000')
# Test exception for unknown format characters
if verbose:
print 'Testing exceptions'
......
......@@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase):
self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
self.assertEqual('%g' % 1.0, '1')
self.assertEqual('%#g' % 1.0, '1.00000')
def test_normal_integers(self):
# Ensure the first 256 integers are shared
a = 256
......@@ -412,6 +415,9 @@ class TypesTests(unittest.TestCase):
self.assertRaises(TypeError, 3 .__format__, None)
self.assertRaises(TypeError, 3 .__format__, 0)
# can't have ',' with 'c'
self.assertRaises(ValueError, 3 .__format__, ",c")
# ensure that only int and float type specifiers work
for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
[chr(x) for x in range(ord('A'), ord('Z')+1)]):
......@@ -609,11 +615,37 @@ class TypesTests(unittest.TestCase):
# a totaly empty format specifier means something else.
# So, just use a sign flag
test(1e200, '+g', '+1e+200')
test(1e200, '+', '+1.0e+200')
test(1e200, '+', '+1e+200')
test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200')
test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200')
# % formatting
# 0 padding
test(1234., '010f', '1234.000000')
test(1234., '011f', '1234.000000')
test(1234., '012f', '01234.000000')
test(-1234., '011f', '-1234.000000')
test(-1234., '012f', '-1234.000000')
test(-1234., '013f', '-01234.000000')
test(-1234.12341234, '013f', '-01234.123412')
test(-123456.12341234, '011.2f', '-0123456.12')
# 0 padding with commas
test(1234., '011,f', '1,234.000000')
test(1234., '012,f', '1,234.000000')
test(1234., '013,f', '01,234.000000')
test(-1234., '012,f', '-1,234.000000')
test(-1234., '013,f', '-1,234.000000')
test(-1234., '014,f', '-01,234.000000')
test(-12345., '015,f', '-012,345.000000')
test(-123456., '016,f', '-0,123,456.000000')
test(-123456., '017,f', '-0,123,456.000000')
test(-123456.12341234, '017,f', '-0,123,456.123412')
test(-123456.12341234, '013,.2f', '-0,123,456.12')
# % formatting
test(-1.0, '%', '-100.000000%')
# format spec must be string
......@@ -637,6 +669,24 @@ class TypesTests(unittest.TestCase):
self.assertRaises(ValueError, format, 0.0, '#')
self.assertRaises(ValueError, format, 0.0, '#20f')
def test_format_spec_errors(self):
# int, float, and string all share the same format spec
# mini-language parser.
# Check that we can't ask for too many digits. This is
# probably a CPython specific test. It tries to put the width
# into a C long.
self.assertRaises(ValueError, format, 0, '1'*10000 + 'd')
# Similar with the precision.
self.assertRaises(ValueError, format, 0, '.' + '1'*10000 + 'd')
# And may as well test both.
self.assertRaises(ValueError, format, 0, '1'*1000 + '.' + '1'*10000 + 'd')
# Make sure commas aren't allowed with various type codes
for code in 'xXobns':
self.assertRaises(ValueError, format, 0, ',' + code)
def test_main():
run_unittest(TypesTests)
......
......@@ -12,6 +12,15 @@ What's New in Python 2.7 alpha 1
Core and Builtins
-----------------
- Implement PEP 378, Format Specifier for Thousands Separator, for
floats, ints, and longs.
- Issue #5515: 'n' formatting for ints, longs, and floats handles
leading zero formatting poorly.
- Issue #5772: For float.__format__, don't add a trailing ".0" if
we're using no type code and we have an exponent.
- Issue #3166: Make long -> float (and int -> float) conversions
correctly rounded.
......
This diff is collapsed.
This diff is collapsed.
......@@ -6,6 +6,15 @@
compiled as unicode. */
#define STRINGLIB_IS_UNICODE 0
/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
/* This needs to be cleaned up. See issue 5793. */
#ifndef _tolower
#define _tolower tolower
#endif
#ifndef _toupper
#define _toupper toupper
#endif
#define STRINGLIB_OBJECT PyStringObject
#define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string"
......@@ -13,8 +22,8 @@
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER toupper
#define STRINGLIB_TOLOWER tolower
#define STRINGLIB_TOUPPER(x) _toupper(Py_CHARMASK(x))
#define STRINGLIB_TOLOWER(x) _tolower(Py_CHARMASK(x))
#define STRINGLIB_FILL memset
#define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_LEN PyString_GET_SIZE
......@@ -24,5 +33,6 @@
#define STRINGLIB_CMP memcmp
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
#endif /* !STRINGLIB_STRINGDEFS_H */
......@@ -37,6 +37,15 @@
*
* Return value: the #gdouble value.
**/
/*
Use system strtod; since strtod is locale aware, we may
have to first fix the decimal separator.
Note that unlike _Py_dg_strtod, the system strtod may not always give
correctly rounded results.
*/
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
......@@ -187,6 +196,13 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val;
}
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}
/* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */
......@@ -292,8 +308,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size)
}
}
/* Ensure that buffer has a decimal point in it. The decimal point
will not be in the current locale, it will always be '.' */
/* Ensure that buffer has a decimal point in it. The decimal point will not
be in the current locale, it will always be '.'. Don't add a decimal if an
exponent is present. */
Py_LOCAL_INLINE(void)
ensure_decimal_point(char* buffer, size_t buf_size)
{
......@@ -322,7 +339,8 @@ ensure_decimal_point(char* buffer, size_t buf_size)
insert_count = 1;
}
}
else {
else if (!(*p == 'e' || *p == 'E')) {
/* Don't add ".0" if we have an exponent. */
chars_to_insert = ".0";
insert_count = 2;
}
......@@ -341,37 +359,6 @@ ensure_decimal_point(char* buffer, size_t buf_size)
}
}
/* Add the locale specific grouping characters to buffer. Note
that any decimal point (if it's present) in buffer is already
locale-specific. Return 0 on error, else 1. */
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
Py_ssize_t len = strlen(buffer);
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
adding grouping. */
char *p = strstr(buffer, decimal_point);
if (!p) {
/* No decimal, use the entire string. */
/* If any exponent, adjust p. */
p = strpbrk(buffer, "eE");
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = buffer + len;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
buf_size, NULL, 1);
}
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
......@@ -386,9 +373,8 @@ add_thousands_grouping(char* buffer, size_t buf_size)
* Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
*
* 'n' is the same as 'g', except it uses the current locale.
* 'Z' is the same as 'g', except it always has a decimal and
* at least one digit after the decimal.
*
......@@ -403,11 +389,6 @@ PyOS_ascii_formatd(char *buffer,
char format_char;
size_t format_len = strlen(format);
/* For type 'n', we need to make a copy of the format string, because
we're going to modify 'n' -> 'g', and format is const char*, so we
can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
we ever need this to be. There's an upcoming check to ensure it's
big enough. */
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
also with at least one character past the decimal. */
char tmp_format[FLOAT_FORMATBUFLEN];
......@@ -433,12 +414,12 @@ PyOS_ascii_formatd(char *buffer,
if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G' ||
format_char == 'n' || format_char == 'Z'))
format_char == 'Z'))
return NULL;
/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
/* Map 'Z' format_char to 'g', by copying the format string and
replacing the final char with a 'g' */
if (format_char == 'n' || format_char == 'Z') {
if (format_char == 'Z') {
if (format_len + 1 >= sizeof(tmp_format)) {
/* The format won't fit in our copy. Error out. In
practice, this will never happen and will be
......@@ -457,11 +438,8 @@ PyOS_ascii_formatd(char *buffer,
/* Do various fixups on the return string */
/* Get the current locale, and find the decimal point string.
Convert that string back to a dot. Do not do this if using the
'n' (number) format code, since we want to keep the localized
decimal point in that case. */
if (format_char != 'n')
change_decimal_from_locale_to_dot(buffer);
Convert that string back to a dot. */
change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
......@@ -475,16 +453,111 @@ PyOS_ascii_formatd(char *buffer,
if (format_char == 'Z')
ensure_decimal_point(buffer, buf_size);
/* If format_char is 'n', add the thousands grouping. */
if (format_char == 'n')
if (!add_thousands_grouping(buffer, buf_size))
return NULL;
return buffer;
}
double
PyOS_ascii_atof(const char *nptr)
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
return PyOS_ascii_strtod(nptr, NULL);
char buf[128];
char format[32];
Py_ssize_t len;
char *result;
char *p;
int t;
int upper = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
break;
case 'E':
upper = 1;
format_code = 'e';
break;
case 'F':
upper = 1;
format_code = 'f';
break;
case 'G':
upper = 1;
format_code = 'g';
break;
case 'r': /* repr format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 17;
format_code = 'g';
break;
case 's': /* str format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
t = Py_DTST_NAN;
} else if (Py_IS_INFINITY(val)) {
if (copysign(1., val) == 1.)
strcpy(buf, "inf");
else
strcpy(buf, "-inf");
t = Py_DTST_INFINITE;
} else {
t = Py_DTST_FINITE;
if (flags & Py_DTSF_ADD_DOT_0)
format_code = 'Z';
PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
PyOS_ascii_formatd(buf, sizeof(buf), format, val);
}
len = strlen(buf);
/* Add 1 for the trailing 0 byte.
Add 1 because we might need to make room for the sign.
*/
result = PyMem_Malloc(len + 2);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
}
p = result;
/* Never add sign for nan/inf, even if asked. */
if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE)
*p++ = '+';
strcpy(p, buf);
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = p; *p1; p1++)
*p1 = toupper(*p1);
}
if (type)
*type = t;
return result;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment