Kaydet (Commit) a3c01ce6 authored tarafından Tim Peters's avatar Tim Peters

SF bug #488480: integer multiply to return -max_int-1.

int_mul():  new and vastly simpler overflow checking.  Whether it's
faster or slower will likely vary across platforms, favoring boxes
with fast floating point.  OTOH, we no longer have to worry about
people shipping broken LONG_BIT definitions <0.9 wink>.
üst d2a557e5
...@@ -71,6 +71,32 @@ if not -24 < -12: raise TestFailed, 'int op' ...@@ -71,6 +71,32 @@ if not -24 < -12: raise TestFailed, 'int op'
xsize, ysize, zsize = 238, 356, 4 xsize, ysize, zsize = 238, 356, 4
if not (xsize*ysize*zsize == zsize*xsize*ysize == 338912): if not (xsize*ysize*zsize == zsize*xsize*ysize == 338912):
raise TestFailed, 'int mul commutativity' raise TestFailed, 'int mul commutativity'
# And another.
m = -sys.maxint - 1
for divisor in 1, 2, 4, 8, 16, 32:
j = m / divisor
prod = divisor * j
if prod != m:
raise TestFailed, "%r * %r == %r != %r" % (divisor, j, prod, m)
if type(prod) is not int:
raise TestFailed, ("expected type(prod) to be int, not %r" %
type(prod))
# Check for expected * overflow to long.
for divisor in 1, 2, 4, 8, 16, 32:
j = m / divisor - 1
prod = divisor * j
if type(prod) is not long:
raise TestFailed, ("expected type(%r) to be long, not %r" %
(prod, type(prod)))
# Check for expected * overflow to long.
m = sys.maxint
for divisor in 1, 2, 4, 8, 16, 32:
j = m / divisor + 1
prod = divisor * j
if type(prod) is not long:
raise TestFailed, ("expected type(%r) to be long, not %r" %
(prod, type(prod)))
print '6.4.2 Long integers' print '6.4.2 Long integers'
if 12L + 24L != 36L: raise TestFailed, 'long op' if 12L + 24L != 36L: raise TestFailed, 'long op'
if 12L + (-24L) != -12L: raise TestFailed, 'long op' if 12L + (-24L) != -12L: raise TestFailed, 'long op'
......
...@@ -148,16 +148,16 @@ PyInt_AsLong(register PyObject *op) ...@@ -148,16 +148,16 @@ PyInt_AsLong(register PyObject *op)
PyNumberMethods *nb; PyNumberMethods *nb;
PyIntObject *io; PyIntObject *io;
long val; long val;
if (op && PyInt_Check(op)) if (op && PyInt_Check(op))
return PyInt_AS_LONG((PyIntObject*) op); return PyInt_AS_LONG((PyIntObject*) op);
if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL || if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL ||
nb->nb_int == NULL) { nb->nb_int == NULL) {
PyErr_SetString(PyExc_TypeError, "an integer is required"); PyErr_SetString(PyExc_TypeError, "an integer is required");
return -1; return -1;
} }
io = (PyIntObject*) (*nb->nb_int) (op); io = (PyIntObject*) (*nb->nb_int) (op);
if (io == NULL) if (io == NULL)
return -1; return -1;
...@@ -166,10 +166,10 @@ PyInt_AsLong(register PyObject *op) ...@@ -166,10 +166,10 @@ PyInt_AsLong(register PyObject *op)
"nb_int should return int object"); "nb_int should return int object");
return -1; return -1;
} }
val = PyInt_AS_LONG(io); val = PyInt_AS_LONG(io);
Py_DECREF(io); Py_DECREF(io);
return val; return val;
} }
...@@ -219,7 +219,7 @@ PyObject * ...@@ -219,7 +219,7 @@ PyObject *
PyInt_FromUnicode(Py_UNICODE *s, int length, int base) PyInt_FromUnicode(Py_UNICODE *s, int length, int base)
{ {
char buffer[256]; char buffer[256];
if (length >= sizeof(buffer)) { if (length >= sizeof(buffer)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"int() literal too large to convert"); "int() literal too large to convert");
...@@ -312,39 +312,38 @@ int_sub(PyIntObject *v, PyIntObject *w) ...@@ -312,39 +312,38 @@ int_sub(PyIntObject *v, PyIntObject *w)
} }
/* /*
Integer overflow checking used to be done using a double, but on 64 Integer overflow checking for * is painful: Python tried a couple ways, but
bit machines (where both long and double are 64 bit) this fails they didn't work on all platforms, or failed in endcases (a product of
because the double doesn't have enough precision. John Tromp suggests -sys.maxint-1 has been a particular pain).
the following algorithm:
Here's another way:
Suppose again we normalize a and b to be nonnegative.
Let ah and al (bh and bl) be the high and low 32 bits of a (b, resp.). The native long product x*y is either exactly right or *way* off, being
Now we test ah and bh against zero and get essentially 3 possible outcomes. just the last n bits of the true product, where n is the number of bits
in a long (the delivered product is the true product plus i*2**n for
1) both ah and bh > 0 : then report overflow some integer i).
2) both ah and bh = 0 : then compute a*b and report overflow if it comes out The native double product (double)x * (double)y is subject to three
negative rounding errors: on a sizeof(long)==8 box, each cast to double can lose
info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3) ah > 0 and bh = 0 : compute ah*bl and report overflow if it's >= 2^31 But, unlike the native long product, it's not in *range* trouble: even
compute al*bl and report overflow if it's negative if sizeof(long)==32 (256-bit longs), the product easily fits in the
add (ah*bl)<<32 to al*bl and report overflow if dynamic range of a double. So the leading 50 (or so) bits of the double
it's negative product are correct.
In case of no overflow the result is then negated if necessary. We check these two ways against each other, and declare victory if they're
approximately the same. Else, because the native long product is the only
The majority of cases will be 2), in which case this method is the same as one that can lose catastrophic amounts of information, it's the native long
what I suggested before. If multiplication is expensive enough, then the product that must have overflowed.
other method is faster on case 3), but also more work to program, so I
guess the above is the preferred solution.
*/ */
static PyObject * static PyObject *
int_mul(PyObject *v, PyObject *w) int_mul(PyObject *v, PyObject *w)
{ {
long a, b, ah, bh, x, y; long a, b;
int s = 1; long longprod; /* a*b in native long arithmetic */
double doubled_longprod; /* (double)longprod */
double doubleprod; /* (double)a * (double)b */
if (!PyInt_Check(v) && if (!PyInt_Check(v) &&
v->ob_type->tp_as_sequence && v->ob_type->tp_as_sequence &&
...@@ -363,97 +362,34 @@ int_mul(PyObject *v, PyObject *w) ...@@ -363,97 +362,34 @@ int_mul(PyObject *v, PyObject *w)
CONVERT_TO_LONG(v, a); CONVERT_TO_LONG(v, a);
CONVERT_TO_LONG(w, b); CONVERT_TO_LONG(w, b);
ah = a >> (LONG_BIT/2); longprod = a * b;
bh = b >> (LONG_BIT/2); doubleprod = (double)a * (double)b;
doubled_longprod = (double)longprod;
/* Quick test for common case: two small positive ints */
/* Fast path for normal case: small multiplicands, and no info
if (ah == 0 && bh == 0) { is lost in either method. */
x = a*b; if (doubled_longprod == doubleprod)
if (x < 0) return PyInt_FromLong(longprod);
goto bad;
return PyInt_FromLong(x); /* Somebody somewhere lost info. Close enough, or way off? Note
} that a != 0 and b != 0 (else doubled_longprod == doubleprod == 0).
The difference either is or isn't significant compared to the
/* Arrange that a >= b >= 0 */ true value (of which doubleprod is a good approximation).
*/
if (a < 0) { {
a = -a; const double diff = doubled_longprod - doubleprod;
if (a < 0) { const double absdiff = diff >= 0.0 ? diff : -diff;
/* Largest negative */ const double absprod = doubleprod >= 0.0 ? doubleprod :
if (b == 0 || b == 1) { -doubleprod;
x = a*b; /* absdiff/absprod <= 1/32 iff
goto ok; 32 * absdiff <= absprod -- 5 good bits is "close enough" */
} if (32.0 * absdiff <= absprod)
else return PyInt_FromLong(longprod);
goto bad; else if (err_ovf("integer multiplication"))
} return NULL;
s = -s; else
ah = a >> (LONG_BIT/2); return PyLong_Type.tp_as_number->nb_multiply(v, w);
}
if (b < 0) {
b = -b;
if (b < 0) {
/* Largest negative */
if (a == 0 || (a == 1 && s == 1)) {
x = a*b;
goto ok;
}
else
goto bad;
}
s = -s;
bh = b >> (LONG_BIT/2);
}
/* 1) both ah and bh > 0 : then report overflow */
if (ah != 0 && bh != 0)
goto bad;
/* 2) both ah and bh = 0 : then compute a*b and report
overflow if it comes out negative */
if (ah == 0 && bh == 0) {
x = a*b;
if (x < 0)
goto bad;
return PyInt_FromLong(x*s);
}
if (a < b) {
/* Swap */
x = a;
a = b;
b = x;
ah = bh;
/* bh not used beyond this point */
} }
/* 3) ah > 0 and bh = 0 : compute ah*bl and report overflow if
it's >= 2^31
compute al*bl and report overflow if it's negative
add (ah*bl)<<32 to al*bl and report overflow if
it's negative
(NB b == bl in this case, and we make a = al) */
y = ah*b;
if (y >= (1L << (LONG_BIT/2 - 1)))
goto bad;
a &= (1L << (LONG_BIT/2)) - 1;
x = a*b;
if (x < 0)
goto bad;
x += y << (LONG_BIT/2);
if (x < 0)
goto bad;
ok:
return PyInt_FromLong(x * s);
bad:
if (err_ovf("integer multiplication"))
return NULL;
return PyLong_Type.tp_as_number->nb_multiply(v, w);
} }
/* Return type of i_divmod */ /* Return type of i_divmod */
...@@ -468,7 +404,7 @@ i_divmod(register long x, register long y, ...@@ -468,7 +404,7 @@ i_divmod(register long x, register long y,
long *p_xdivy, long *p_xmody) long *p_xdivy, long *p_xmody)
{ {
long xdivy, xmody; long xdivy, xmody;
if (y == 0) { if (y == 0) {
PyErr_SetString(PyExc_ZeroDivisionError, PyErr_SetString(PyExc_ZeroDivisionError,
"integer division or modulo by zero"); "integer division or modulo by zero");
...@@ -623,7 +559,7 @@ int_pow(PyIntObject *v, PyIntObject *w, PyIntObject *z) ...@@ -623,7 +559,7 @@ int_pow(PyIntObject *v, PyIntObject *w, PyIntObject *z)
ix = 1; ix = 1;
while (iw > 0) { while (iw > 0) {
prev = ix; /* Save value for overflow check */ prev = ix; /* Save value for overflow check */
if (iw & 1) { if (iw & 1) {
ix = ix*temp; ix = ix*temp;
if (temp == 0) if (temp == 0)
break; /* Avoid ix / 0 */ break; /* Avoid ix / 0 */
...@@ -666,7 +602,7 @@ int_pow(PyIntObject *v, PyIntObject *w, PyIntObject *z) ...@@ -666,7 +602,7 @@ int_pow(PyIntObject *v, PyIntObject *w, PyIntObject *z)
} }
} }
return PyInt_FromLong(ix); return PyInt_FromLong(ix);
} }
static PyObject * static PyObject *
int_neg(PyIntObject *v) int_neg(PyIntObject *v)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment