Kaydet (Commit) aca19e6a authored tarafından Eric Smith's avatar Eric Smith

Backport of some of the work in r71665 to trunk. This reworks much of

int, long, and float __format__(), and it keeps their implementation
in sync with py3k.

Also added PyOS_double_to_string. This is the "fallback" version
that's also available in trunk, and should be kept in sync with that
code. I'll add an issue to document PyOS_double_to_string in the C
API.

There are many internal cleanups. Externally visible changes include:

- Implement PEP 378, Format Specifier for Thousands Separator, for
  floats, ints, and longs.

- Issue #5515: 'n' formatting for ints, longs, and floats handles
  leading zero formatting poorly.

- Issue #5772: For float.__format__, don't add a trailing ".0" if
  we're using no type code and we have an exponent.
üst cbb53087
...@@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); ...@@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d); PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d);
/* The caller is responsible for calling PyMem_Free to free the buffer
that's is returned. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type);
/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
#define Py_DTSF_SIGN 0x01 /* always add the sign */
#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code
specific */
/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */
#define Py_DTST_FINITE 0
#define Py_DTST_INFINITE 1
#define Py_DTST_NAN 2
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -177,16 +177,26 @@ PyAPI_FUNC(int) PyString_AsStringAndSize( ...@@ -177,16 +177,26 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
strings) */ strings) */
); );
/* Using the current locale, insert the thousands grouping /* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions, into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */ see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGroupingLocale(char *buffer,
Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width);
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer, /* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer, Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits, Py_ssize_t n_digits,
Py_ssize_t buf_size, Py_ssize_t min_width,
Py_ssize_t *count, const char *grouping,
int append_zero_char); const char *thousands_sep);
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */
......
...@@ -232,6 +232,10 @@ class FormatTest(unittest.TestCase): ...@@ -232,6 +232,10 @@ class FormatTest(unittest.TestCase):
testboth("%o", -042L, "-42") testboth("%o", -042L, "-42")
testboth("%o", float(042), "42") testboth("%o", float(042), "42")
# alternate float formatting
testformat('%g', 1.1, '1.1')
testformat('%#g', 1.1, '1.10000')
# Test exception for unknown format characters # Test exception for unknown format characters
if verbose: if verbose:
print 'Testing exceptions' print 'Testing exceptions'
......
...@@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase): ...@@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase):
self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101') self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
self.assertEqual('%e' % 1.5e-101, '1.500000e-101') self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
self.assertEqual('%g' % 1.0, '1')
self.assertEqual('%#g' % 1.0, '1.00000')
def test_normal_integers(self): def test_normal_integers(self):
# Ensure the first 256 integers are shared # Ensure the first 256 integers are shared
a = 256 a = 256
...@@ -412,6 +415,9 @@ class TypesTests(unittest.TestCase): ...@@ -412,6 +415,9 @@ class TypesTests(unittest.TestCase):
self.assertRaises(TypeError, 3 .__format__, None) self.assertRaises(TypeError, 3 .__format__, None)
self.assertRaises(TypeError, 3 .__format__, 0) self.assertRaises(TypeError, 3 .__format__, 0)
# can't have ',' with 'c'
self.assertRaises(ValueError, 3 .__format__, ",c")
# ensure that only int and float type specifiers work # ensure that only int and float type specifiers work
for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
[chr(x) for x in range(ord('A'), ord('Z')+1)]): [chr(x) for x in range(ord('A'), ord('Z')+1)]):
...@@ -609,10 +615,36 @@ class TypesTests(unittest.TestCase): ...@@ -609,10 +615,36 @@ class TypesTests(unittest.TestCase):
# a totaly empty format specifier means something else. # a totaly empty format specifier means something else.
# So, just use a sign flag # So, just use a sign flag
test(1e200, '+g', '+1e+200') test(1e200, '+g', '+1e+200')
test(1e200, '+', '+1.0e+200') test(1e200, '+', '+1e+200')
test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200')
test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200') test(1.1e200, '+', '+1.1e+200')
# 0 padding
test(1234., '010f', '1234.000000')
test(1234., '011f', '1234.000000')
test(1234., '012f', '01234.000000')
test(-1234., '011f', '-1234.000000')
test(-1234., '012f', '-1234.000000')
test(-1234., '013f', '-01234.000000')
test(-1234.12341234, '013f', '-01234.123412')
test(-123456.12341234, '011.2f', '-0123456.12')
# 0 padding with commas
test(1234., '011,f', '1,234.000000')
test(1234., '012,f', '1,234.000000')
test(1234., '013,f', '01,234.000000')
test(-1234., '012,f', '-1,234.000000')
test(-1234., '013,f', '-1,234.000000')
test(-1234., '014,f', '-01,234.000000')
test(-12345., '015,f', '-012,345.000000')
test(-123456., '016,f', '-0,123,456.000000')
test(-123456., '017,f', '-0,123,456.000000')
test(-123456.12341234, '017,f', '-0,123,456.123412')
test(-123456.12341234, '013,.2f', '-0,123,456.12')
# % formatting # % formatting
test(-1.0, '%', '-100.000000%') test(-1.0, '%', '-100.000000%')
...@@ -637,6 +669,24 @@ class TypesTests(unittest.TestCase): ...@@ -637,6 +669,24 @@ class TypesTests(unittest.TestCase):
self.assertRaises(ValueError, format, 0.0, '#') self.assertRaises(ValueError, format, 0.0, '#')
self.assertRaises(ValueError, format, 0.0, '#20f') self.assertRaises(ValueError, format, 0.0, '#20f')
def test_format_spec_errors(self):
# int, float, and string all share the same format spec
# mini-language parser.
# Check that we can't ask for too many digits. This is
# probably a CPython specific test. It tries to put the width
# into a C long.
self.assertRaises(ValueError, format, 0, '1'*10000 + 'd')
# Similar with the precision.
self.assertRaises(ValueError, format, 0, '.' + '1'*10000 + 'd')
# And may as well test both.
self.assertRaises(ValueError, format, 0, '1'*1000 + '.' + '1'*10000 + 'd')
# Make sure commas aren't allowed with various type codes
for code in 'xXobns':
self.assertRaises(ValueError, format, 0, ',' + code)
def test_main(): def test_main():
run_unittest(TypesTests) run_unittest(TypesTests)
......
...@@ -12,6 +12,15 @@ What's New in Python 2.7 alpha 1 ...@@ -12,6 +12,15 @@ What's New in Python 2.7 alpha 1
Core and Builtins Core and Builtins
----------------- -----------------
- Implement PEP 378, Format Specifier for Thousands Separator, for
floats, ints, and longs.
- Issue #5515: 'n' formatting for ints, longs, and floats handles
leading zero formatting poorly.
- Issue #5772: For float.__format__, don't add a trailing ".0" if
we're using no type code and we have an exponent.
- Issue #3166: Make long -> float (and int -> float) conversions - Issue #3166: Make long -> float (and int -> float) conversions
correctly rounded. correctly rounded.
......
This diff is collapsed.
This diff is collapsed.
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
compiled as unicode. */ compiled as unicode. */
#define STRINGLIB_IS_UNICODE 0 #define STRINGLIB_IS_UNICODE 0
/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
/* This needs to be cleaned up. See issue 5793. */
#ifndef _tolower
#define _tolower tolower
#endif
#ifndef _toupper
#define _toupper toupper
#endif
#define STRINGLIB_OBJECT PyStringObject #define STRINGLIB_OBJECT PyStringObject
#define STRINGLIB_CHAR char #define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_TYPE_NAME "string"
...@@ -13,8 +22,8 @@ ...@@ -13,8 +22,8 @@
#define STRINGLIB_EMPTY nullstring #define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) #define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) #define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER toupper #define STRINGLIB_TOUPPER(x) _toupper(Py_CHARMASK(x))
#define STRINGLIB_TOLOWER tolower #define STRINGLIB_TOLOWER(x) _tolower(Py_CHARMASK(x))
#define STRINGLIB_FILL memset #define STRINGLIB_FILL memset
#define STRINGLIB_STR PyString_AS_STRING #define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_LEN PyString_GET_SIZE #define STRINGLIB_LEN PyString_GET_SIZE
...@@ -24,5 +33,6 @@ ...@@ -24,5 +33,6 @@
#define STRINGLIB_CMP memcmp #define STRINGLIB_CMP memcmp
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping #define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
#endif /* !STRINGLIB_STRINGDEFS_H */ #endif /* !STRINGLIB_STRINGDEFS_H */
...@@ -37,6 +37,15 @@ ...@@ -37,6 +37,15 @@
* *
* Return value: the #gdouble value. * Return value: the #gdouble value.
**/ **/
/*
Use system strtod; since strtod is locale aware, we may
have to first fix the decimal separator.
Note that unlike _Py_dg_strtod, the system strtod may not always give
correctly rounded results.
*/
double double
PyOS_ascii_strtod(const char *nptr, char **endptr) PyOS_ascii_strtod(const char *nptr, char **endptr)
{ {
...@@ -187,6 +196,13 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) ...@@ -187,6 +196,13 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val; return val;
} }
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}
/* Given a string that may have a decimal point in the current /* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */ longer, no need for a maximum buffer size parameter. */
...@@ -292,8 +308,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size) ...@@ -292,8 +308,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size)
} }
} }
/* Ensure that buffer has a decimal point in it. The decimal point /* Ensure that buffer has a decimal point in it. The decimal point will not
will not be in the current locale, it will always be '.' */ be in the current locale, it will always be '.'. Don't add a decimal if an
exponent is present. */
Py_LOCAL_INLINE(void) Py_LOCAL_INLINE(void)
ensure_decimal_point(char* buffer, size_t buf_size) ensure_decimal_point(char* buffer, size_t buf_size)
{ {
...@@ -322,7 +339,8 @@ ensure_decimal_point(char* buffer, size_t buf_size) ...@@ -322,7 +339,8 @@ ensure_decimal_point(char* buffer, size_t buf_size)
insert_count = 1; insert_count = 1;
} }
} }
else { else if (!(*p == 'e' || *p == 'E')) {
/* Don't add ".0" if we have an exponent. */
chars_to_insert = ".0"; chars_to_insert = ".0";
insert_count = 2; insert_count = 2;
} }
...@@ -341,37 +359,6 @@ ensure_decimal_point(char* buffer, size_t buf_size) ...@@ -341,37 +359,6 @@ ensure_decimal_point(char* buffer, size_t buf_size)
} }
} }
/* Add the locale specific grouping characters to buffer. Note
that any decimal point (if it's present) in buffer is already
locale-specific. Return 0 on error, else 1. */
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
Py_ssize_t len = strlen(buffer);
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
adding grouping. */
char *p = strstr(buffer, decimal_point);
if (!p) {
/* No decimal, use the entire string. */
/* If any exponent, adjust p. */
p = strpbrk(buffer, "eE");
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = buffer + len;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
buf_size, NULL, 1);
}
/* see FORMATBUFLEN in unicodeobject.c */ /* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120 #define FLOAT_FORMATBUFLEN 120
...@@ -386,9 +373,8 @@ add_thousands_grouping(char* buffer, size_t buf_size) ...@@ -386,9 +373,8 @@ add_thousands_grouping(char* buffer, size_t buf_size)
* Converts a #gdouble to a string, using the '.' as * Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in * decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion * a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'. * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
* *
* 'n' is the same as 'g', except it uses the current locale.
* 'Z' is the same as 'g', except it always has a decimal and * 'Z' is the same as 'g', except it always has a decimal and
* at least one digit after the decimal. * at least one digit after the decimal.
* *
...@@ -403,11 +389,6 @@ PyOS_ascii_formatd(char *buffer, ...@@ -403,11 +389,6 @@ PyOS_ascii_formatd(char *buffer,
char format_char; char format_char;
size_t format_len = strlen(format); size_t format_len = strlen(format);
/* For type 'n', we need to make a copy of the format string, because
we're going to modify 'n' -> 'g', and format is const char*, so we
can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
we ever need this to be. There's an upcoming check to ensure it's
big enough. */
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
also with at least one character past the decimal. */ also with at least one character past the decimal. */
char tmp_format[FLOAT_FORMATBUFLEN]; char tmp_format[FLOAT_FORMATBUFLEN];
...@@ -433,12 +414,12 @@ PyOS_ascii_formatd(char *buffer, ...@@ -433,12 +414,12 @@ PyOS_ascii_formatd(char *buffer,
if (!(format_char == 'e' || format_char == 'E' || if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' || format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G' || format_char == 'g' || format_char == 'G' ||
format_char == 'n' || format_char == 'Z')) format_char == 'Z'))
return NULL; return NULL;
/* Map 'n' or 'Z' format_char to 'g', by copying the format string and /* Map 'Z' format_char to 'g', by copying the format string and
replacing the final char with a 'g' */ replacing the final char with a 'g' */
if (format_char == 'n' || format_char == 'Z') { if (format_char == 'Z') {
if (format_len + 1 >= sizeof(tmp_format)) { if (format_len + 1 >= sizeof(tmp_format)) {
/* The format won't fit in our copy. Error out. In /* The format won't fit in our copy. Error out. In
practice, this will never happen and will be practice, this will never happen and will be
...@@ -457,10 +438,7 @@ PyOS_ascii_formatd(char *buffer, ...@@ -457,10 +438,7 @@ PyOS_ascii_formatd(char *buffer,
/* Do various fixups on the return string */ /* Do various fixups on the return string */
/* Get the current locale, and find the decimal point string. /* Get the current locale, and find the decimal point string.
Convert that string back to a dot. Do not do this if using the Convert that string back to a dot. */
'n' (number) format code, since we want to keep the localized
decimal point in that case. */
if (format_char != 'n')
change_decimal_from_locale_to_dot(buffer); change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least /* If an exponent exists, ensure that the exponent is at least
...@@ -475,16 +453,111 @@ PyOS_ascii_formatd(char *buffer, ...@@ -475,16 +453,111 @@ PyOS_ascii_formatd(char *buffer,
if (format_char == 'Z') if (format_char == 'Z')
ensure_decimal_point(buffer, buf_size); ensure_decimal_point(buffer, buf_size);
/* If format_char is 'n', add the thousands grouping. */
if (format_char == 'n')
if (!add_thousands_grouping(buffer, buf_size))
return NULL;
return buffer; return buffer;
} }
double PyAPI_FUNC(char *) PyOS_double_to_string(double val,
PyOS_ascii_atof(const char *nptr) char format_code,
int precision,
int flags,
int *type)
{ {
return PyOS_ascii_strtod(nptr, NULL); char buf[128];
char format[32];
Py_ssize_t len;
char *result;
char *p;
int t;
int upper = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
break;
case 'E':
upper = 1;
format_code = 'e';
break;
case 'F':
upper = 1;
format_code = 'f';
break;
case 'G':
upper = 1;
format_code = 'g';
break;
case 'r': /* repr format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 17;
format_code = 'g';
break;
case 's': /* str format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
t = Py_DTST_NAN;
} else if (Py_IS_INFINITY(val)) {
if (copysign(1., val) == 1.)
strcpy(buf, "inf");
else
strcpy(buf, "-inf");
t = Py_DTST_INFINITE;
} else {
t = Py_DTST_FINITE;
if (flags & Py_DTSF_ADD_DOT_0)
format_code = 'Z';
PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
PyOS_ascii_formatd(buf, sizeof(buf), format, val);
}
len = strlen(buf);
/* Add 1 for the trailing 0 byte.
Add 1 because we might need to make room for the sign.
*/
result = PyMem_Malloc(len + 2);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
}
p = result;
/* Never add sign for nan/inf, even if asked. */
if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE)
*p++ = '+';
strcpy(p, buf);
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = p; *p1; p1++)
*p1 = toupper(*p1);
}
if (type)
*type = t;
return result;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment