Backport of some of the work in r71665 to trunk. This reworks much of

int, long, and float __format__(), and it keeps their implementation in sync with py3k. Also added PyOS_double_to_string. This is the "fallback" version that's also available in trunk, and should be kept in sync with that code. I'll add an issue to document PyOS_double_to_string in the C API. There are many internal cleanups. Externally visible changes include: - Implement PEP 378, Format Specifier for Thousands Separator, for floats, ints, and longs. - Issue #5515: 'n' formatting for ints, longs, and floats handles leading zero formatting poorly. - Issue #5772: For float.__format__, don't add a trailing ".0" if we're using no type code and we have an exponent.

Backport of some of the work in r71665 to trunk. This reworks much of
int, long, and float __format__(), and it keeps their implementation in sync with py3k. Also added PyOS_double_to_string. This is the "fallback" version that's also available in trunk, and should be kept in sync with that code. I'll add an issue to document PyOS_double_to_string in the C API. There are many internal cleanups. Externally visible changes include: - Implement PEP 378, Format Specifier for Thousands Separator, for floats, ints, and longs. - Issue #5515: 'n' formatting for ints, longs, and floats handles leading zero formatting poorly. - Issue #5772: For float.__format__, don't add a trailing ".0" if we're using no type code and we have an exponent.
aca19e6a · Eric Smith · cbb53087 · aca19e6a · aca19e6a · aca19e6a
Kaydet (Commit) aca19e6a authored Nis 22, 2009 tarafından Eric Smith
9 changed files
--- a/Include/pystrtod.h
+++ b/Include/pystrtod.h
@@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
 PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
 PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len,  const char *format, double d);

+/* The caller is responsible for calling PyMem_Free to free the buffer
+   that's is returned. */
+PyAPI_FUNC(char *) PyOS_double_to_string(double val,
+                                         char format_code,
+                                         int precision,
+                                         int flags,
+                                         int *type);
+
+
+/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
+#define Py_DTSF_SIGN      0x01 /* always add the sign */
+#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
+#define Py_DTSF_ALT       0x04 /* "alternate" formatting. it's format_code
+                                  specific */
+
+/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */
+#define Py_DTST_FINITE 0
+#define Py_DTST_INFINITE 1
+#define Py_DTST_NAN 2

 #ifdef __cplusplus
 }

--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@@ -177,16 +177,26 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
 				   strings) */
    );

+
 /* Using the current locale, insert the thousands grouping
   into the string pointed to by buffer.  For the argument descriptions,
   see Objects/stringlib/localeutil.h */
+PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGroupingLocale(char *buffer,
+                                  Py_ssize_t n_buffer,
+                                  char *digits,
+                                  Py_ssize_t n_digits,
+                                  Py_ssize_t min_width);

-PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
+/* Using explicit passed-in values, insert the thousands grouping
+   into the string pointed to by buffer.  For the argument descriptions,
+   see Objects/stringlib/localeutil.h */
+PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGrouping(char *buffer,
                                  Py_ssize_t n_buffer,
+                                  char *digits,
                                  Py_ssize_t n_digits,
-						  Py_ssize_t buf_size,
-						  Py_ssize_t *count,
-						  int append_zero_char);
+                                  Py_ssize_t min_width,
+                                  const char *grouping,
+                                  const char *thousands_sep);

 /* Format the object based on the format_spec, as defined in PEP 3101
   (Advanced String Formatting). */

--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -232,6 +232,10 @@ class FormatTest(unittest.TestCase):
        testboth("%o", -042L, "-42")
        testboth("%o", float(042), "42")

+        # alternate float formatting
+        testformat('%g', 1.1, '1.1')
+        testformat('%#g', 1.1, '1.10000')
+
        # Test exception for unknown format characters
        if verbose:
            print 'Testing exceptions'

--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase):
        self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
        self.assertEqual('%e' % 1.5e-101, '1.500000e-101')

+        self.assertEqual('%g' % 1.0, '1')
+        self.assertEqual('%#g' % 1.0, '1.00000')
+
    def test_normal_integers(self):
        # Ensure the first 256 integers are shared
        a = 256
@@ -412,6 +415,9 @@ class TypesTests(unittest.TestCase):
        self.assertRaises(TypeError, 3 .__format__, None)
        self.assertRaises(TypeError, 3 .__format__, 0)

+        # can't have ',' with 'c'
+        self.assertRaises(ValueError, 3 .__format__, ",c")
+
        # ensure that only int and float type specifiers work
        for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
                            [chr(x) for x in range(ord('A'), ord('Z')+1)]):
@@ -609,10 +615,36 @@ class TypesTests(unittest.TestCase):
        # a totaly empty format specifier means something else.
        # So, just use a sign flag
        test(1e200, '+g', '+1e+200')
-        test(1e200, '+', '+1.0e+200')
+        test(1e200, '+', '+1e+200')
+        test(1.1e200, '+g', '+1.1e+200')
+        test(1.1e200, '+', '+1.1e+200')
+
        test(1.1e200, '+g', '+1.1e+200')
        test(1.1e200, '+', '+1.1e+200')

+        # 0 padding
+        test(1234., '010f', '1234.000000')
+        test(1234., '011f', '1234.000000')
+        test(1234., '012f', '01234.000000')
+        test(-1234., '011f', '-1234.000000')
+        test(-1234., '012f', '-1234.000000')
+        test(-1234., '013f', '-01234.000000')
+        test(-1234.12341234, '013f', '-01234.123412')
+        test(-123456.12341234, '011.2f', '-0123456.12')
+
+        # 0 padding with commas
+        test(1234., '011,f', '1,234.000000')
+        test(1234., '012,f', '1,234.000000')
+        test(1234., '013,f', '01,234.000000')
+        test(-1234., '012,f', '-1,234.000000')
+        test(-1234., '013,f', '-1,234.000000')
+        test(-1234., '014,f', '-01,234.000000')
+        test(-12345., '015,f', '-012,345.000000')
+        test(-123456., '016,f', '-0,123,456.000000')
+        test(-123456., '017,f', '-0,123,456.000000')
+        test(-123456.12341234, '017,f', '-0,123,456.123412')
+        test(-123456.12341234, '013,.2f', '-0,123,456.12')
+
         # % formatting
        test(-1.0, '%', '-100.000000%')

@@ -637,6 +669,24 @@ class TypesTests(unittest.TestCase):
        self.assertRaises(ValueError, format, 0.0, '#')
        self.assertRaises(ValueError, format, 0.0, '#20f')

+    def test_format_spec_errors(self):
+        # int, float, and string all share the same format spec
+        # mini-language parser.
+
+        # Check that we can't ask for too many digits. This is
+        # probably a CPython specific test. It tries to put the width
+        # into a C long.
+        self.assertRaises(ValueError, format, 0, '1'*10000 + 'd')
+
+        # Similar with the precision.
+        self.assertRaises(ValueError, format, 0, '.' + '1'*10000 + 'd')
+
+        # And may as well test both.
+        self.assertRaises(ValueError, format, 0, '1'*1000 + '.' + '1'*10000 + 'd')
+
+        # Make sure commas aren't allowed with various type codes
+        for code in 'xXobns':
+            self.assertRaises(ValueError, format, 0, ',' + code)

 def test_main():
    run_unittest(TypesTests)

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,15 @@ What's New in Python 2.7 alpha 1
 Core and Builtins
 -----------------

+- Implement PEP 378, Format Specifier for Thousands Separator, for
+  floats, ints, and longs.
+
+- Issue #5515: 'n' formatting for ints, longs, and floats handles
+  leading zero formatting poorly.
+
+- Issue #5772: For float.__format__, don't add a trailing ".0" if
+  we're using no type code and we have an exponent.
+
 - Issue #3166: Make long -> float (and int -> float) conversions
  correctly rounded.


--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -6,6 +6,15 @@
   compiled as unicode. */
 #define STRINGLIB_IS_UNICODE     0

+/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
+/* This needs to be cleaned up. See issue 5793. */
+#ifndef _tolower
+#define _tolower tolower
+#endif
+#ifndef _toupper
+#define _toupper toupper
+#endif
+
 #define STRINGLIB_OBJECT         PyStringObject
 #define STRINGLIB_CHAR           char
 #define STRINGLIB_TYPE_NAME      "string"
@@ -13,8 +22,8 @@
 #define STRINGLIB_EMPTY          nullstring
 #define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))
 #define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
-#define STRINGLIB_TOUPPER        toupper
-#define STRINGLIB_TOLOWER        tolower
+#define STRINGLIB_TOUPPER(x)     _toupper(Py_CHARMASK(x))
+#define STRINGLIB_TOLOWER(x)     _tolower(Py_CHARMASK(x))
 #define STRINGLIB_FILL           memset
 #define STRINGLIB_STR            PyString_AS_STRING
 #define STRINGLIB_LEN            PyString_GET_SIZE
@@ -24,5 +33,6 @@
 #define STRINGLIB_CMP            memcmp
 #define STRINGLIB_TOSTR          PyObject_Str
 #define STRINGLIB_GROUPING       _PyString_InsertThousandsGrouping
+#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale

 #endif /* !STRINGLIB_STRINGDEFS_H */
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -37,6 +37,15 @@
 *
 * Return value: the #gdouble value.
 **/
+
+/*
+   Use system strtod;  since strtod is locale aware, we may
+   have to first fix the decimal separator.
+
+   Note that unlike _Py_dg_strtod, the system strtod may not always give
+   correctly rounded results.
+*/
+
 double
 PyOS_ascii_strtod(const char *nptr, char **endptr)
 {
@@ -187,6 +196,13 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	return val;
 }

+double
+PyOS_ascii_atof(const char *nptr)
+{
+	return PyOS_ascii_strtod(nptr, NULL);
+}
+
+
 /* Given a string that may have a decimal point in the current
   locale, change it back to a dot.  Since the string cannot get
   longer, no need for a maximum buffer size parameter. */
@@ -292,8 +308,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size)
 	}
 }

-/* Ensure that buffer has a decimal point in it.  The decimal point
-   will not be in the current locale, it will always be '.' */
+/* Ensure that buffer has a decimal point in it.  The decimal point will not
+   be in the current locale, it will always be '.'. Don't add a decimal if an
+   exponent is present. */
 Py_LOCAL_INLINE(void)
 ensure_decimal_point(char* buffer, size_t buf_size)
 {
@@ -322,7 +339,8 @@ ensure_decimal_point(char* buffer, size_t buf_size)
 			insert_count = 1;
 		}
 	}
-	else {
+	else if (!(*p == 'e' || *p == 'E')) {
+		/* Don't add ".0" if we have an exponent. */
 		chars_to_insert = ".0";
 		insert_count = 2;
 	}
@@ -341,37 +359,6 @@ ensure_decimal_point(char* buffer, size_t buf_size)
 	}
 }

-/* Add the locale specific grouping characters to buffer.  Note
-   that any decimal point (if it's present) in buffer is already
-   locale-specific.  Return 0 on error, else 1. */
-Py_LOCAL_INLINE(int)
-add_thousands_grouping(char* buffer, size_t buf_size)
-{
-	Py_ssize_t len = strlen(buffer);
-	struct lconv *locale_data = localeconv();
-	const char *decimal_point = locale_data->decimal_point;
-
-	/* Find the decimal point, if any.  We're only concerned
-	   about the characters to the left of the decimal when
-	   adding grouping. */
-	char *p = strstr(buffer, decimal_point);
-	if (!p) {
-		/* No decimal, use the entire string. */
-
-		/* If any exponent, adjust p. */
-		p = strpbrk(buffer, "eE");
-		if (!p)
-			/* No exponent and no decimal.  Use the entire
-			   string. */
-			p = buffer + len;
-	}
-	/* At this point, p points just past the right-most character we
-	   want to format.  We need to add the grouping string for the
-	   characters between buffer and p. */
-	return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
-						 buf_size, NULL, 1);
-}
-
 /* see FORMATBUFLEN in unicodeobject.c */
 #define FLOAT_FORMATBUFLEN 120

@@ -386,9 +373,8 @@ add_thousands_grouping(char* buffer, size_t buf_size)
 * Converts a #gdouble to a string, using the '.' as
 * decimal point. To format the number you pass in
 * a printf()-style format string. Allowed conversion
- * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
+ * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
 * 
- * 'n' is the same as 'g', except it uses the current locale.
 * 'Z' is the same as 'g', except it always has a decimal and
 *     at least one digit after the decimal.
 *
@@ -403,11 +389,6 @@ PyOS_ascii_formatd(char       *buffer,
 	char format_char;
 	size_t format_len = strlen(format);

-	/* For type 'n', we need to make a copy of the format string, because
-	   we're going to modify 'n' -> 'g', and format is const char*, so we
-	   can't modify it directly.  FLOAT_FORMATBUFLEN should be longer than
-	   we ever need this to be.  There's an upcoming check to ensure it's
-	   big enough. */
 	/* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
 	   also with at least one character past the decimal. */
 	char tmp_format[FLOAT_FORMATBUFLEN];
@@ -433,12 +414,12 @@ PyOS_ascii_formatd(char       *buffer,
 	if (!(format_char == 'e' || format_char == 'E' || 
 	      format_char == 'f' || format_char == 'F' || 
 	      format_char == 'g' || format_char == 'G' ||
-	      format_char == 'n' || format_char == 'Z'))
+	      format_char == 'Z'))
 		return NULL;

-	/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
+	/* Map 'Z' format_char to 'g', by copying the format string and
 	   replacing the final char with a 'g' */
-	if (format_char == 'n' || format_char == 'Z') {
+	if (format_char == 'Z') {
 		if (format_len + 1 >= sizeof(tmp_format)) {
 			/* The format won't fit in our copy.  Error out.  In
 			   practice, this will never happen and will be
@@ -457,10 +438,7 @@ PyOS_ascii_formatd(char       *buffer,
 	/* Do various fixups on the return string */

 	/* Get the current locale, and find the decimal point string.
-	   Convert that string back to a dot.  Do not do this if using the
-	   'n' (number) format code, since we want to keep the localized
-	   decimal point in that case. */
-	if (format_char != 'n')
+	   Convert that string back to a dot. */
 	change_decimal_from_locale_to_dot(buffer);

 	/* If an exponent exists, ensure that the exponent is at least
@@ -475,16 +453,111 @@ PyOS_ascii_formatd(char       *buffer,
 	if (format_char == 'Z')
 		ensure_decimal_point(buffer, buf_size);

-	/* If format_char is 'n', add the thousands grouping. */
-	if (format_char == 'n')
-		if (!add_thousands_grouping(buffer, buf_size))
-			return NULL;
-
 	return buffer;
 }

-double
-PyOS_ascii_atof(const char *nptr)
+PyAPI_FUNC(char *) PyOS_double_to_string(double val,
+                                         char format_code,
+                                         int precision,
+                                         int flags,
+                                         int *type)
 {
-	return PyOS_ascii_strtod(nptr, NULL);
+	char buf[128];
+	char format[32];
+	Py_ssize_t len;
+	char *result;
+	char *p;
+	int t;
+	int upper = 0;
+
+	/* Validate format_code, and map upper and lower case */
+	switch (format_code) {
+	case 'e':          /* exponent */
+	case 'f':          /* fixed */
+	case 'g':          /* general */
+		break;
+	case 'E':
+		upper = 1;
+		format_code = 'e';
+		break;
+	case 'F':
+		upper = 1;
+		format_code = 'f';
+		break;
+	case 'G':
+		upper = 1;
+		format_code = 'g';
+		break;
+	case 'r':          /* repr format */
+		/* Supplied precision is unused, must be 0. */
+		if (precision != 0) {
+			PyErr_BadInternalCall();
+			return NULL;
+		}
+		precision = 17;
+		format_code = 'g';
+		break;
+	case 's':          /* str format */
+		/* Supplied precision is unused, must be 0. */
+		if (precision != 0) {
+			PyErr_BadInternalCall();
+			return NULL;
+		}
+		precision = 12;
+		format_code = 'g';
+		break;
+	default:
+		PyErr_BadInternalCall();
+		return NULL;
+	}
+
+	/* Handle nan and inf. */
+	if (Py_IS_NAN(val)) {
+		strcpy(buf, "nan");
+		t = Py_DTST_NAN;
+	} else if (Py_IS_INFINITY(val)) {
+		if (copysign(1., val) == 1.)
+			strcpy(buf, "inf");
+		else
+			strcpy(buf, "-inf");
+		t = Py_DTST_INFINITE;
+	} else {
+		t = Py_DTST_FINITE;
+
+
+		if (flags & Py_DTSF_ADD_DOT_0)
+			format_code = 'Z';
+
+		PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
+		PyOS_ascii_formatd(buf, sizeof(buf), format, val);
+	}
+
+	len = strlen(buf);
+
+	/* Add 1 for the trailing 0 byte.
+	   Add 1 because we might need to make room for the sign.
+	   */
+	result = PyMem_Malloc(len + 2);
+	if (result == NULL) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+	p = result;
+
+	/* Never add sign for nan/inf, even if asked. */
+	if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE)
+		*p++ = '+';
+
+	strcpy(p, buf);
+
+	if (upper) {
+		/* Convert to upper case. */
+		char *p1;
+		for (p1 = p; *p1; p1++)
+			*p1 = toupper(*p1);
+	}
+
+	if (type)
+		*type = t;
+	return result;
 }