Kaydet (Commit) 7a3bfc3a authored tarafından Tim Peters's avatar Tim Peters

Added q/Q standard (x-platform 8-byte ints) mode in struct module.

This completes the q/Q project.

longobject.c _PyLong_AsByteArray:  The original code had a gross bug:
the most-significant Python digit doesn't necessarily have SHIFT
significant bits, and you really need to count how many copies of the sign
bit it has else spurious overflow errors result.

test_struct.py:  This now does exhaustive std q/Q testing at, and on both
sides of, all relevant power-of-2 boundaries, both positive and negative.

NEWS:  Added brief dict news while I was at it.
üst ac4797a1
...@@ -72,7 +72,8 @@ Notes: ...@@ -72,7 +72,8 @@ Notes:
\item[(1)] \item[(1)]
The \character{q} and \character{Q} conversion codes are available in The \character{q} and \character{Q} conversion codes are available in
native mode only if the platform C compiler supports C \ctype{long long}, native mode only if the platform C compiler supports C \ctype{long long},
or, on Windows, \ctype{__int64}. or, on Windows, \ctype{__int64}. They're always available in standard
modes.
\end{description} \end{description}
...@@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation ...@@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation
is truncated. If the string is too short, padding is used to ensure is truncated. If the string is too short, padding is used to ensure
that exactly enough bytes are used to satisfy the count. that exactly enough bytes are used to satisfy the count.
For the \character{I} and \character{L} format characters, the return For the \character{I}, \character{L}, \character{q} and \character{Q}
value is a Python long integer. format characters, the return value is a Python long integer.
For the \character{P} format character, the return value is a Python For the \character{P} format character, the return value is a Python
integer or long integer, depending on the size needed to hold a integer or long integer, depending on the size needed to hold a
...@@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's ...@@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's
order. order.
Standard size and alignment are as follows: no alignment is required Standard size and alignment are as follows: no alignment is required
for any type (so you have to use pad bytes); \ctype{short} is 2 bytes; for any type (so you have to use pad bytes);
\ctype{int} and \ctype{long} are 4 bytes. \ctype{float} and \ctype{short} is 2 bytes;
\ctype{double} are 32-bit and 64-bit IEEE floating point numbers, \ctype{int} and \ctype{long} are 4 bytes;
respectively. \ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
\ctype{float} and \ctype{double} are 32-bit and 64-bit
IEEE floating point numbers, respectively.
Note the difference between \character{@} and \character{=}: both use Note the difference between \character{@} and \character{=}: both use
native byte order, but the size and alignment of the latter is native byte order, but the size and alignment of the latter is
......
...@@ -12,6 +12,16 @@ def simple_err(func, *args): ...@@ -12,6 +12,16 @@ def simple_err(func, *args):
func.__name__, args) func.__name__, args)
## pdb.set_trace() ## pdb.set_trace()
def any_err(func, *args):
try:
apply(func, args)
except (struct.error, OverflowError, TypeError):
pass
else:
raise TestFailed, "%s%s did not raise error" % (
func.__name__, args)
## pdb.set_trace()
simple_err(struct.calcsize, 'Z') simple_err(struct.calcsize, 'Z')
sz = struct.calcsize('i') sz = struct.calcsize('i')
...@@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests: ...@@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests:
raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % ( raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
`fmt`, `res`, `rev`, `arg`) `fmt`, `res`, `rev`, `arg`)
# Some q/Q sanity checks. ###########################################################################
# q/Q tests.
has_native_qQ = 1 has_native_qQ = 1
try: try:
...@@ -124,18 +135,22 @@ except struct.error: ...@@ -124,18 +135,22 @@ except struct.error:
if verbose: if verbose:
print "Platform has native q/Q?", has_native_qQ and "Yes." or "No." print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless any_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless
simple_err(struct.pack, "Q", "a") # ditto, but 'Q' simple_err(struct.pack, "Q", "a") # ditto, but 'Q'
def string_reverse(s):
chars = list(s)
chars.reverse()
return "".join(chars)
def bigendian_to_native(value): def bigendian_to_native(value):
if isbigendian: if isbigendian:
return value return value
chars = list(value) else:
chars.reverse() return string_reverse(value)
return "".join(chars)
if has_native_qQ: def test_native_qQ():
bytes = struct.calcsize('q') bytes = struct.calcsize('q')
# The expected values here are in big-endian format, primarily because # The expected values here are in big-endian format, primarily because
# I'm on a little-endian machine and so this is the clearest way (for # I'm on a little-endian machine and so this is the clearest way (for
...@@ -156,3 +171,147 @@ if has_native_qQ: ...@@ -156,3 +171,147 @@ if has_native_qQ:
verify(retrieved == input, verify(retrieved == input,
"%r-unpack of %r gave %r, not %r" % "%r-unpack of %r gave %r, not %r" %
(format, got, retrieved, input)) (format, got, retrieved, input))
if has_native_qQ:
test_native_qQ()
# Standard q/Q (8 bytes; should work on all platforms).
MIN_Q, MAX_Q = 0, 2L**64 - 1
MIN_q, MAX_q = -(2L**63), 2L**63 - 1
import binascii
def test_one_qQ(x, pack=struct.pack,
unpack=struct.unpack,
unhexlify=binascii.unhexlify):
if verbose:
print "trying std q/Q on", x, "==", hex(x)
# Try 'q'.
if MIN_q <= x <= MAX_q:
# Try '>q'.
expected = long(x)
if x < 0:
expected += 1L << 64
assert expected > 0
expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
if len(expected) & 1:
expected = "0" + expected
expected = unhexlify(expected)
expected = "\x00" * (8 - len(expected)) + expected
# >q pack work?
got = pack(">q", x)
verify(got == expected,
"'>q'-pack of %r gave %r, not %r" %
(x, got, expected))
# >q unpack work?
retrieved = unpack(">q", got)[0]
verify(x == retrieved,
"'>q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, ">q", '\x01' + got)
# Try '<q'.
expected = string_reverse(expected)
# <q pack work?
got = pack("<q", x)
verify(got == expected,
"'<q'-pack of %r gave %r, not %r" %
(x, got, expected))
# <q unpack work?
retrieved = unpack("<q", got)[0]
verify(x == retrieved,
"'<q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, "<q", '\x01' + got)
else:
# x is out of q's range -- verify pack realizes that.
any_err(pack, '>q', x)
any_err(pack, '<q', x)
# Much the same for 'Q'.
if MIN_Q <= x <= MAX_Q:
# Try '>Q'.
expected = long(x)
expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
if len(expected) & 1:
expected = "0" + expected
expected = unhexlify(expected)
expected = "\x00" * (8 - len(expected)) + expected
# >Q pack work?
got = pack(">Q", x)
verify(got == expected,
"'>Q'-pack of %r gave %r, not %r" %
(x, got, expected))
# >Q unpack work?
retrieved = unpack(">Q", got)[0]
verify(x == retrieved,
"'>Q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, ">Q", '\x01' + got)
# Try '<Q'.
expected = string_reverse(expected)
# <Q pack work?
got = pack("<Q", x)
verify(got == expected,
"'<Q'-pack of %r gave %r, not %r" %
(x, got, expected))
# <Q unpack work?
retrieved = unpack("<Q", got)[0]
verify(x == retrieved,
"'<Q'-unpack of %r gave %r, not %r" %
(got, retrieved, x))
# Adding any byte should cause a "too big" error.
any_err(unpack, "<Q", '\x01' + got)
else:
# x is out of Q's range -- verify pack realizes that.
any_err(pack, '>Q', x)
any_err(pack, '<Q', x)
def test_std_qQ():
from random import randrange
# Create all interesting powers of 2.
values = []
for exp in range(70):
values.append(1L << exp)
# Add some random 64-bit values.
for i in range(50):
val = 0L
for j in range(8):
val = (val << 8) | randrange(256)
values.append(val)
# Try all those, and their negations, and +-1 from them. Note
# that this tests all power-of-2 boundaries in range, and a few out
# of range, plus +-(2**n +- 1).
for base in values:
for val in -base, base:
for incr in -1, 0, 1:
x = val + incr
try:
x = int(x)
except OverflowError:
pass
test_one_qQ(x)
test_std_qQ()
...@@ -84,6 +84,9 @@ Core ...@@ -84,6 +84,9 @@ Core
sortdict(dict) function for a simple way to display a dict in sorted sortdict(dict) function for a simple way to display a dict in sorted
order. order.
- Many other small changes to dicts were made, resulting in faster
operation along the most common code paths.
- Dictionary objects now support the "in" operator: "x in dict" means - Dictionary objects now support the "in" operator: "x in dict" means
the same as dict.has_key(x). the same as dict.has_key(x).
...@@ -119,7 +122,7 @@ Core ...@@ -119,7 +122,7 @@ Core
- Collisions in dicts are resolved via a new approach, which can help - Collisions in dicts are resolved via a new approach, which can help
dramatically in bad cases. For example, looking up every key in a dict dramatically in bad cases. For example, looking up every key in a dict
d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
faster now. Thanks to Christian Tismer for pointing out the cause and faster now. Thanks to Christian Tismer for pointing out the cause and
the nature of an effective cure (last December! better late than never). the nature of an effective cure (last December! better late than never).
...@@ -145,8 +148,8 @@ Library ...@@ -145,8 +148,8 @@ Library
native mode, these can be used only when the platform C compiler supports native mode, these can be used only when the platform C compiler supports
these types (when HAVE_LONG_LONG is #define'd by the Python config these types (when HAVE_LONG_LONG is #define'd by the Python config
process), and then they inherit the sizes and alignments of the C types. process), and then they inherit the sizes and alignments of the C types.
XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and In standard mode, 'q' and 'Q' are supported on all platforms, and are
XXX TODO are 8-byte integral types. 8-byte integral types.
Tests Tests
......
...@@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long; ...@@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long;
#pragma options align=reset #pragma options align=reset
#endif #endif
/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */
static PyObject *
get_pylong(PyObject *v)
{
PyNumberMethods *m;
assert(v != NULL);
if (PyInt_Check(v))
return PyLong_FromLong(PyInt_AS_LONG(v));
if (PyLong_Check(v)) {
Py_INCREF(v);
return v;
}
m = v->ob_type->tp_as_number;
if (m != NULL && m->nb_long != NULL) {
v = m->nb_long(v);
if (v == NULL)
return NULL;
if (PyLong_Check(v))
return v;
Py_DECREF(v);
}
PyErr_SetString(StructError,
"cannot convert argument to long");
return NULL;
}
/* Helper routine to get a Python integer and raise the appropriate error /* Helper routine to get a Python integer and raise the appropriate error
if it isn't one */ if it isn't one */
...@@ -123,33 +151,13 @@ static int ...@@ -123,33 +151,13 @@ static int
get_longlong(PyObject *v, LONG_LONG *p) get_longlong(PyObject *v, LONG_LONG *p)
{ {
LONG_LONG x; LONG_LONG x;
int v_needs_decref = 0;
if (PyInt_Check(v)) { v = get_pylong(v);
x = (LONG_LONG)PyInt_AS_LONG(v); if (v == NULL)
*p = x; return -1;
return 0;
}
if (!PyLong_Check(v)) {
PyNumberMethods *m = v->ob_type->tp_as_number;
if (m != NULL && m->nb_long != NULL) {
v = m->nb_long(v);
if (v == NULL)
return -1;
v_needs_decref = 1;
}
if (!PyLong_Check(v)) {
PyErr_SetString(StructError,
"cannot convert argument to long");
if (v_needs_decref)
Py_DECREF(v);
return -1;
}
}
assert(PyLong_Check(v)); assert(PyLong_Check(v));
x = PyLong_AsLongLong(v); x = PyLong_AsLongLong(v);
if (v_needs_decref) Py_DECREF(v);
Py_DECREF(v);
if (x == (LONG_LONG)-1 && PyErr_Occurred()) if (x == (LONG_LONG)-1 && PyErr_Occurred())
return -1; return -1;
*p = x; *p = x;
...@@ -162,39 +170,13 @@ static int ...@@ -162,39 +170,13 @@ static int
get_ulonglong(PyObject *v, unsigned LONG_LONG *p) get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
{ {
unsigned LONG_LONG x; unsigned LONG_LONG x;
int v_needs_decref = 0;
if (PyInt_Check(v)) { v = get_pylong(v);
long i = PyInt_AS_LONG(v); if (v == NULL)
if (i < 0) { return -1;
PyErr_SetString(StructError, "can't convert negative "
"int to unsigned");
return -1;
}
x = (unsigned LONG_LONG)i;
*p = x;
return 0;
}
if (!PyLong_Check(v)) {
PyNumberMethods *m = v->ob_type->tp_as_number;
if (m != NULL && m->nb_long != NULL) {
v = m->nb_long(v);
if (v == NULL)
return -1;
v_needs_decref = 1;
}
if (!PyLong_Check(v)) {
PyErr_SetString(StructError,
"cannot convert argument to long");
if (v_needs_decref)
Py_DECREF(v);
return -1;
}
}
assert(PyLong_Check(v)); assert(PyLong_Check(v));
x = PyLong_AsUnsignedLongLong(v); x = PyLong_AsUnsignedLongLong(v);
if (v_needs_decref) Py_DECREF(v);
Py_DECREF(v);
if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred()) if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
return -1; return -1;
*p = x; *p = x;
...@@ -500,7 +482,7 @@ typedef struct _formatdef { ...@@ -500,7 +482,7 @@ typedef struct _formatdef {
TYPE is one of char, byte, ubyte, etc. TYPE is one of char, byte, ubyte, etc.
*/ */
/* Native mode routines. */ /* Native mode routines. ****************************************************/
static PyObject * static PyObject *
nu_char(const char *p, const formatdef *f) nu_char(const char *p, const formatdef *f)
...@@ -797,6 +779,8 @@ static formatdef native_table[] = { ...@@ -797,6 +779,8 @@ static formatdef native_table[] = {
{0} {0}
}; };
/* Big-endian routines. *****************************************************/
static PyObject * static PyObject *
bu_int(const char *p, const formatdef *f) bu_int(const char *p, const formatdef *f)
{ {
...@@ -825,6 +809,24 @@ bu_uint(const char *p, const formatdef *f) ...@@ -825,6 +809,24 @@ bu_uint(const char *p, const formatdef *f)
return PyInt_FromLong((long)x); return PyInt_FromLong((long)x);
} }
static PyObject *
bu_longlong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
0, /* little-endian */
1 /* signed */);
}
static PyObject *
bu_ulonglong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
0, /* little-endian */
0 /* signed */);
}
static PyObject * static PyObject *
bu_float(const char *p, const formatdef *f) bu_float(const char *p, const formatdef *f)
{ {
...@@ -867,6 +869,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f) ...@@ -867,6 +869,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f)
return 0; return 0;
} }
static int
bp_longlong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject *)v,
(unsigned char *)p,
8,
0, /* little_endian */
1 /* signed */);
Py_DECREF(v);
return res;
}
static int
bp_ulonglong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject *)v,
(unsigned char *)p,
8,
0, /* little_endian */
0 /* signed */);
Py_DECREF(v);
return res;
}
static int static int
bp_float(char *p, PyObject *v, const formatdef *f) bp_float(char *p, PyObject *v, const formatdef *f)
{ {
...@@ -904,11 +934,15 @@ static formatdef bigendian_table[] = { ...@@ -904,11 +934,15 @@ static formatdef bigendian_table[] = {
{'I', 4, 0, bu_uint, bp_uint}, {'I', 4, 0, bu_uint, bp_uint},
{'l', 4, 0, bu_int, bp_int}, {'l', 4, 0, bu_int, bp_int},
{'L', 4, 0, bu_uint, bp_uint}, {'L', 4, 0, bu_uint, bp_uint},
{'q', 8, 0, bu_longlong, bp_longlong},
{'Q', 8, 0, bu_ulonglong, bp_ulonglong},
{'f', 4, 0, bu_float, bp_float}, {'f', 4, 0, bu_float, bp_float},
{'d', 8, 0, bu_double, bp_double}, {'d', 8, 0, bu_double, bp_double},
{0} {0}
}; };
/* Little-endian routines. *****************************************************/
static PyObject * static PyObject *
lu_int(const char *p, const formatdef *f) lu_int(const char *p, const formatdef *f)
{ {
...@@ -937,6 +971,24 @@ lu_uint(const char *p, const formatdef *f) ...@@ -937,6 +971,24 @@ lu_uint(const char *p, const formatdef *f)
return PyInt_FromLong((long)x); return PyInt_FromLong((long)x);
} }
static PyObject *
lu_longlong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
1, /* little-endian */
1 /* signed */);
}
static PyObject *
lu_ulonglong(const char *p, const formatdef *f)
{
return _PyLong_FromByteArray((const unsigned char *)p,
8,
1, /* little-endian */
0 /* signed */);
}
static PyObject * static PyObject *
lu_float(const char *p, const formatdef *f) lu_float(const char *p, const formatdef *f)
{ {
...@@ -979,6 +1031,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f) ...@@ -979,6 +1031,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f)
return 0; return 0;
} }
static int
lp_longlong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject*)v,
(unsigned char *)p,
8,
1, /* little_endian */
1 /* signed */);
Py_DECREF(v);
return res;
}
static int
lp_ulonglong(char *p, PyObject *v, const formatdef *f)
{
int res;
v = get_pylong(v);
res = _PyLong_AsByteArray((PyLongObject*)v,
(unsigned char *)p,
8,
1, /* little_endian */
0 /* signed */);
Py_DECREF(v);
return res;
}
static int static int
lp_float(char *p, PyObject *v, const formatdef *f) lp_float(char *p, PyObject *v, const formatdef *f)
{ {
...@@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = { ...@@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = {
{'I', 4, 0, lu_uint, lp_uint}, {'I', 4, 0, lu_uint, lp_uint},
{'l', 4, 0, lu_int, lp_int}, {'l', 4, 0, lu_int, lp_int},
{'L', 4, 0, lu_uint, lp_uint}, {'L', 4, 0, lu_uint, lp_uint},
{'q', 8, 0, lu_longlong, lp_longlong},
{'Q', 8, 0, lu_ulonglong, lp_ulonglong},
{'f', 4, 0, lu_float, lp_float}, {'f', 4, 0, lu_float, lp_float},
{'d', 8, 0, lu_double, lp_double}, {'d', 8, 0, lu_double, lp_double},
{0} {0}
......
...@@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v, ...@@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v,
accumbits = 0; accumbits = 0;
carry = do_twos_comp ? 1 : 0; carry = do_twos_comp ? 1 : 0;
for (i = 0; i < ndigits; ++i) { for (i = 0; i < ndigits; ++i) {
unsigned int oldaccumbits = accumbits;
twodigits thisdigit = v->ob_digit[i]; twodigits thisdigit = v->ob_digit[i];
if (do_twos_comp) { if (do_twos_comp) {
thisdigit = (thisdigit ^ MASK) + carry; thisdigit = (thisdigit ^ MASK) + carry;
carry = thisdigit >> SHIFT; carry = thisdigit >> SHIFT;
thisdigit &= MASK; thisdigit &= MASK;
} }
if (i < ndigits - 1)
accumbits += SHIFT;
else {
/* The most-significant digit may be partly empty. */
twodigits bitmask = 1 << (SHIFT - 1);
twodigits signbit = do_twos_comp << (SHIFT - 1);
unsigned int nsignbits = 0;
while ((thisdigit & bitmask) == signbit && bitmask) {
++nsignbits;
bitmask >>= 1;
signbit >>= 1;
}
accumbits += SHIFT - nsignbits;
}
/* Because we're going LSB to MSB, thisdigit is more /* Because we're going LSB to MSB, thisdigit is more
significant than what's already in accum, so needs to be significant than what's already in accum, so needs to be
prepended to accum. */ prepended to accum. */
accum |= thisdigit << accumbits; accum |= thisdigit << oldaccumbits;
accumbits += SHIFT;
/* Store as many bytes as possible. */ /* Store as many bytes as possible. */
assert(accumbits >= 8); while (accumbits >= 8) {
do {
if (j >= n) if (j >= n)
goto Overflow; goto Overflow;
++j; ++j;
...@@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v, ...@@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v,
p += pincr; p += pincr;
accumbits -= 8; accumbits -= 8;
accum >>= 8; accum >>= 8;
} while (accumbits >= 8); }
} }
/* Store the straggler (if any). */ /* Store the straggler (if any). */
assert(accumbits < 8); assert(accumbits < 8);
assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */ assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */
if (accum) { if (accumbits > 0) {
if (j >= n) if (j >= n)
goto Overflow; goto Overflow;
++j; ++j;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment