Kaydet (Commit) 778d5cc4 authored tarafından Raymond Hettinger's avatar Raymond Hettinger

Tweak the comments and formatting.

üst 8f66a4a3
...@@ -311,61 +311,36 @@ FUNC1(tanh, tanh, 0, ...@@ -311,61 +311,36 @@ FUNC1(tanh, tanh, 0,
<http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>, <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
enhanced with the exact partials sum and roundoff from Mark enhanced with the exact partials sum and roundoff from Mark
Dickinson's post at <http://bugs.python.org/file10357/msum4.py>. Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
See those links for more details, proofs and other references.
See both of those for more details, proofs and other references.
Note 1: IEEE 754R floating point semantics are assumed,
Note 1: IEEE 754 floating point format and semantics are assumed, but not but the current implementation does not re-establish special
explicitly maintained. The following rules may not apply: value semantics across iterations (i.e. handling -Inf + Inf).
1. if the summands include a NaN, return a NaN, Note 2: No provision is made for intermediate overflow handling;
therefore, sum([1e+308, 1e-308, 1e+308]) returns result 1e+308 while
2. if the summands include infinities of both signs, raise ValueError, sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
overflow of the first partial sum.
3. if the summands include infinities of only one sign, return infinity
with that sign, Note 3: Aggressively optimizing compilers can potentially eliminate the
residual values needed for accurate summation. For instance, the statements
4. otherwise (all summands are finite) if the result is infinite, raise "hi = x + y; lo = y - (hi - x);" could be mis-transformed to
OverflowError. The result can never be a NaN if all summands are "hi = x + y; lo = 0.0;" which defeats the computation of residuals.
finite.
Note 4: A similar implementation is in Modules/cmathmodule.c.
Note 2: the implementation below not include the intermediate overflow Be sure to update both when making changes.
handling from Mark Dickinson's msum(). Therefore, sum([1e+308, 1e-308,
1e+308]) returns result 1e+308, however sum([1e+308, 1e+308, 1e-308]) Note 5: The signature of math.sum() differs from __builtin__.sum()
raises an OverflowError due to intermediate overflow of the first because the start argument doesn't make sense in the context of
partial sum. accurate summation. Since the partials table is collapsed before
returning a result, sum(seq2, start=sum(seq1)) may not equal the
Note 3: aggressively optimizing compilers may eliminate the roundoff accurate result returned by sum(itertools.chain(seq1, seq2)).
expressions critical for accurate summation. For example, the compiler
may optimize the following expressions
hi = x + y;
lo = y - (hi - x);
to
hi = x + y;
lo = 0.0;
defeating the whole purpose. Using volatile variables and/or explicit
assignment of critical subexpressions to a volatile variable should
remedy the problem
volatile double v; // Deter compiler from algebraically optimizing
// this critical, intermediate value away
hi = x + y;
v = hi - x;
lo = y - v;
by forcing the compiler to compute the value for v. This may also help
when subexpression are not computed with the full double precision.
Note 4. the same summation functions may be in ./cmathmodule.c. Make
sure to update both when making changes.
*/ */
#define NUM_PARTIALS 32 /* initial partials array size, on stack */ #define NUM_PARTIALS 32 /* initial partials array size, on stack */
/* Extend the partials array p[] by doubling its size. /* Extend the partials array p[] by doubling its size. */
*/ static int /* non-zero on error */
static int /* non-zero on error */
_sum_realloc(double **p_ptr, Py_ssize_t n, _sum_realloc(double **p_ptr, Py_ssize_t n,
double *ps, Py_ssize_t *m_ptr) double *ps, Py_ssize_t *m_ptr)
{ {
...@@ -383,7 +358,7 @@ _sum_realloc(double **p_ptr, Py_ssize_t n, ...@@ -383,7 +358,7 @@ _sum_realloc(double **p_ptr, Py_ssize_t n,
else else
v = PyMem_Realloc(p, sizeof(double) * m); v = PyMem_Realloc(p, sizeof(double) * m);
} }
if (v == NULL) { /* size overflow or no memory */ if (v == NULL) { /* size overflow or no memory */
PyErr_SetString(PyExc_MemoryError, "math sum partials"); PyErr_SetString(PyExc_MemoryError, "math sum partials");
return 1; return 1;
} }
...@@ -419,8 +394,9 @@ _sum_realloc(double **p_ptr, Py_ssize_t n, ...@@ -419,8 +394,9 @@ _sum_realloc(double **p_ptr, Py_ssize_t n,
non-zero, non-special, non-overlapping and strictly increasing in non-zero, non-special, non-overlapping and strictly increasing in
magnitude, but possibly not all having the same sign. magnitude, but possibly not all having the same sign.
Depends on IEEE 754 arithmetic guarantees. Depends on IEEE 754 arithmetic guarantees and half-even rounding.
*/ */
static PyObject* static PyObject*
math_sum(PyObject *self, PyObject *seq) math_sum(PyObject *self, PyObject *seq)
{ {
...@@ -434,8 +410,7 @@ math_sum(PyObject *self, PyObject *seq) ...@@ -434,8 +410,7 @@ math_sum(PyObject *self, PyObject *seq)
PyFPE_START_PROTECT("sum", Py_DECREF(iter); return NULL) PyFPE_START_PROTECT("sum", Py_DECREF(iter); return NULL)
for(;;) { /* for x in iterable */ for(;;) { /* for x in iterable */
/* some invariants */
assert(0 <= n && n <= m); assert(0 <= n && n <= m);
assert((m == NUM_PARTIALS && p == ps) || assert((m == NUM_PARTIALS && p == ps) ||
(m > NUM_PARTIALS && p != NULL)); (m > NUM_PARTIALS && p != NULL));
...@@ -444,28 +419,27 @@ math_sum(PyObject *self, PyObject *seq) ...@@ -444,28 +419,27 @@ math_sum(PyObject *self, PyObject *seq)
if (item == NULL) { if (item == NULL) {
if (PyErr_Occurred()) if (PyErr_Occurred())
goto _sum_error; goto _sum_error;
else break;
break;
} }
x = PyFloat_AsDouble(item); x = PyFloat_AsDouble(item);
Py_DECREF(item); Py_DECREF(item);
if (PyErr_Occurred()) if (PyErr_Occurred())
goto _sum_error; goto _sum_error;
for (i = j = 0; j < n; j++) { /* for y in partials */ for (i = j = 0; j < n; j++) { /* for y in partials */
y = p[j]; y = p[j];
hi = x + y; hi = x + y;
lo = fabs(x) < fabs(y) lo = fabs(x) < fabs(y)
? x - (hi - y) /* volatile */ ? x - (hi - y)
: y - (hi - x); /* volatile */ : y - (hi - x);
if (lo != 0.0) if (lo != 0.0)
p[i++] = lo; p[i++] = lo;
x = hi; x = hi;
} }
/* ps[i:] = [x] */
n = i; n = i; /* ps[i:] = [x] */
if (x != 0.0) { if (x != 0.0) {
/* if non-finite, reset partials, effectively /* If non-finite, reset partials, effectively
adding subsequent items without roundoff adding subsequent items without roundoff
and yielding correct non-finite results, and yielding correct non-finite results,
provided IEEE 754 rules are observed */ provided IEEE 754 rules are observed */
...@@ -476,27 +450,27 @@ math_sum(PyObject *self, PyObject *seq) ...@@ -476,27 +450,27 @@ math_sum(PyObject *self, PyObject *seq)
p[n++] = x; p[n++] = x;
} }
} }
assert(n <= m);
if (n > 0) { if (n > 0) {
hi = p[--n]; hi = p[--n];
if (Py_IS_FINITE(hi)) { if (Py_IS_FINITE(hi)) {
/* sum_exact(ps, hi) from the top, stop /* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */
as soon as the sum becomes inexact */
while (n > 0) { while (n > 0) {
x = p[--n]; x = p[--n];
y = hi; y = hi;
hi = x + y; hi = x + y;
assert(fabs(x) < fabs(y)); assert(fabs(x) < fabs(y));
lo = x - (hi - y); /* volatile */ lo = x - (hi - y);
if (lo != 0.0) if (lo != 0.0)
break; break;
} }
/* round correctly if necessary */ /* Little dance to allow half-even rounding across multiple partials.
Needed so that sum([1e-16, 1, 1e16]) will round-up to two instead
of down to zero (the 1e16 makes the 1 slightly closer to two). */
if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
(lo > 0.0 && p[n-1] > 0.0))) { (lo > 0.0 && p[n-1] > 0.0))) {
y = lo * 2.0; y = lo * 2.0;
x = hi + y; /* volatile */ x = hi + y;
if (y == (x - hi)) if (y == (x - hi))
hi = x; hi = x;
} }
...@@ -513,7 +487,6 @@ math_sum(PyObject *self, PyObject *seq) ...@@ -513,7 +487,6 @@ math_sum(PyObject *self, PyObject *seq)
_sum_error: _sum_error:
PyFPE_END_PROTECT(hi) PyFPE_END_PROTECT(hi)
Py_DECREF(iter); Py_DECREF(iter);
if (p != ps) if (p != ps)
PyMem_Free(p); PyMem_Free(p);
...@@ -523,11 +496,9 @@ _sum_error: ...@@ -523,11 +496,9 @@ _sum_error:
#undef NUM_PARTIALS #undef NUM_PARTIALS
PyDoc_STRVAR(math_sum_doc, PyDoc_STRVAR(math_sum_doc,
"sum(sequence)\n\n\ "sum(iterable)\n\n\
Return the full precision sum of a sequence of numbers.\n\ Return an accurate floating point sum of values in the iterable.\n\
When the sequence is empty, return zero.\n\n\ Assumes IEEE-754 floating point arithmetic.");
For accurate results, IEEE 754 floating point format\n\
and semantics and floating point radix 2 are required.");
static PyObject * static PyObject *
math_trunc(PyObject *self, PyObject *number) math_trunc(PyObject *self, PyObject *number)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment