Kaydet (Commit) 4d6381df authored tarafından Jeremy Hylton's avatar Jeremy Hylton

Fix UCNs machine with >= 32bit longs

originally submitted by Bill Tutt

Note: This code is actually going to be replaced in 2.0 by /F's new
database.  Until then, this patch keeps the test suite working.
üst 1962fb59
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* perfect_hash.py: * perfect_hash.py:
* http://starship.python.net/crew/amk/python/code/perfect-hash.html * http://starship.python.net/crew/amk/python/code/perfect-hash.html
* *
* Generated on: Wed Jun 28 03:34:07 2000 * Generated on: Fri Jul 14 08:00:58 2000
*/ */
#define k_cHashElements 18836 #define k_cHashElements 18836
...@@ -26,20 +26,36 @@ static long f1(const char *key, unsigned int cch) ...@@ -26,20 +26,36 @@ static long f1(const char *key, unsigned int cch)
{ {
register int len; register int len;
register unsigned char *p; register unsigned char *p;
register long x; register unsigned long x;
len = cch; len = cch;
p = (unsigned char *) key; p = (unsigned char *) key;
x = 1694245428; x = 0x64fc2234;
while (--len >= 0) while (--len >= 0)
x = (1000003*x) ^ toupper(*(p++)); {
/* (1000003 * x) ^ toupper(*(p++))
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ toupper(*(p++));
}
x ^= cch + 10; x ^= cch + 10;
if (x == -1) if (x == 0xFFFFFFFF)
x = -2; x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate 32-bit signed (2's complement) modulo operation */
x = (~x & 0xFFFFFFFF) + 1;
x %= k_cHashElements;
if (x != 0)
{
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= k_cHashElements; x %= k_cHashElements;
/* ensure the returned value is positive so we mimic Python's % operator */
if (x < 0)
x += k_cHashElements;
return x; return x;
} }
...@@ -48,20 +64,36 @@ static long f2(const char *key, unsigned int cch) ...@@ -48,20 +64,36 @@ static long f2(const char *key, unsigned int cch)
{ {
register int len; register int len;
register unsigned char *p; register unsigned char *p;
register long x; register unsigned long x;
len = cch; len = cch;
p = (unsigned char *) key; p = (unsigned char *) key;
x = -1917331657; x = 0x8db7d737;
while (--len >= 0) while (--len >= 0)
x = (1000003*x) ^ toupper(*(p++)); {
/* (1000003 * x) ^ toupper(*(p++))
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ toupper(*(p++));
}
x ^= cch + 10; x ^= cch + 10;
if (x == -1) if (x == 0xFFFFFFFF)
x = -2; x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate 32-bit signed (2's complement) modulo operation */
x = (~x & 0xFFFFFFFF) + 1;
x %= k_cHashElements;
if (x != 0)
{
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= k_cHashElements; x %= k_cHashElements;
/* ensure the returned value is positive so we mimic Python's % operator */
if (x < 0)
x += k_cHashElements;
return x; return x;
} }
......
...@@ -12,8 +12,8 @@ import perfect_hash ...@@ -12,8 +12,8 @@ import perfect_hash
# These variables determine which hash function is tried first. # These variables determine which hash function is tried first.
# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/ # Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/
f1Seed = 1694245428 f1Seed = 0x64fc2234
f2Seed = -1917331657 f2Seed = 0x8db7d737
# Maximum allowed multipler, if this isn't None then instead of continually # Maximum allowed multipler, if this isn't None then instead of continually
# increasing C, it resets it back to initC to keep searching for # increasing C, it resets it back to initC to keep searching for
......
...@@ -73,25 +73,29 @@ class Hash: ...@@ -73,25 +73,29 @@ class Hash:
key = str(key) key = str(key)
if self.caseInsensitive: if self.caseInsensitive:
key = string.upper(key) key = string.upper(key)
x = perfhash.hash(self.seed, len(self.junk), key) % self.N x = perfhash.hash(self.seed, len(self.junk), key, self.N)
#h = hash(self.junk + key) % self.N
#assert x == h
return x return x
def generate_code(self): def generate_code(self):
s = """{ s = """{
register int len; register int len;
register unsigned char *p; register unsigned char *p;
register long x; register unsigned long x;
len = cch; len = cch;
p = (unsigned char *) key; p = (unsigned char *) key;
x = %(junkSeed)d; x = %(junkSeed)s;
while (--len >= 0) while (--len >= 0)
x = (1000003*x) ^ """ % \ {
/* (1000003 * x) ^ toupper(*(p++))
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ """ % \
{ {
"lenJunk" : len(self.junk), "lenJunk" : len(self.junk),
"junkSeed" : self.seed, "junkSeed" : hex(self.seed),
} }
if self.caseInsensitive: if self.caseInsensitive:
...@@ -99,20 +103,29 @@ class Hash: ...@@ -99,20 +103,29 @@ class Hash:
else: else:
s = s + "*(p++);" s = s + "*(p++);"
s = s + """ s = s + """
}
x ^= cch + %(lenJunk)d; x ^= cch + %(lenJunk)d;
if (x == -1) if (x == 0xFFFFFFFF)
x = -2; x = 0xfffffffe;
if (x & 0x80000000)
{
/* Emulate 32-bit signed (2's complement) modulo operation */
x = (~x & 0xFFFFFFFF) + 1;
x %%= k_cHashElements;
if (x != 0)
{
x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %%= k_cHashElements; x %%= k_cHashElements;
/* ensure the returned value is positive so we mimic Python's %% operator */
if (x < 0)
x += k_cHashElements;
return x; return x;
} }
""" % { "lenJunk" : len(self.junk), """ % { "lenJunk" : len(self.junk),
"junkSeed" : self.seed, } "junkSeed" : hex(self.seed), }
return s return s
WHITE, GREY, BLACK = 0,1,2 WHITE, GREY, BLACK = 0,1,2
class Graph: class Graph:
"""Graph class. This class isn't particularly efficient or general, """Graph class. This class isn't particularly efficient or general,
...@@ -139,8 +152,8 @@ class Graph: ...@@ -139,8 +152,8 @@ class Graph:
value 'value'""" value 'value'"""
if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1 if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1
# if self.edges.has_key( (vertex1, vertex2) ): if self.edges.has_key( (vertex1, vertex2) ):
# raise ValueError, 'Collision: vertices already connected' raise ValueError, 'Collision: vertices already connected'
self.edges[ (vertex1, vertex2) ] = value self.edges[ (vertex1, vertex2) ] = value
# Add vertices to each other's reachable list # Add vertices to each other's reachable list
...@@ -341,8 +354,8 @@ typedef struct %(structName)s ...@@ -341,8 +354,8 @@ typedef struct %(structName)s
""" % (self.cHashElements, self.cchMax, self.cKeys) """ % (self.cHashElements, self.cchMax, self.cKeys)
code = code + """ code = code + """
static const %s G[k_cHashElements]; staticforward const %s G[k_cHashElements];
static const %s %s[k_cKeys]; staticforward const %s %s[k_cKeys];
""" % (self.type, dataArrayType, dataArrayName) """ % (self.type, dataArrayType, dataArrayName)
code = code + """ code = code + """
...@@ -553,7 +566,7 @@ def generate_hash(keys, caseInsensitive=0, ...@@ -553,7 +566,7 @@ def generate_hash(keys, caseInsensitive=0,
# edge. # edge.
for k, v in keys: for k, v in keys:
h1 = f1(k) ; h2 = f2(k) h1 = f1(k) ; h2 = f2(k)
G.connect( h1,h2, v) G.connect( h1, h2, v)
# Check if the resulting graph is acyclic; if it is, # Check if the resulting graph is acyclic; if it is,
# we're done with step 1. # we're done with step 1.
...@@ -598,8 +611,9 @@ def generate_hash(keys, caseInsensitive=0, ...@@ -598,8 +611,9 @@ def generate_hash(keys, caseInsensitive=0,
sys.stderr.write('Found perfect hash function!\n') sys.stderr.write('Found perfect hash function!\n')
sys.stderr.write('\nIn order to regenerate this hash function, \n') sys.stderr.write('\nIn order to regenerate this hash function, \n')
sys.stderr.write('you need to pass these following values back in:\n') sys.stderr.write('you need to pass these following values back in:\n')
sys.stderr.write('f1 seed: %s\n' % repr(f1.seed)) sys.stderr.write('f1 seed: %s\n' % hex(f1.seed))
sys.stderr.write('f2 seed: %s\n' % repr(f2.seed)) sys.stderr.write('f2 seed: %s\n' % hex(f2.seed))
sys.stderr.write('initial multipler: %s\n' % c) sys.stderr.write('initial multipler: %s\n' % c)
return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue) return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue)
...@@ -5,11 +5,13 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw) ...@@ -5,11 +5,13 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
PyStringObject *a; PyStringObject *a;
register int len; register int len;
register unsigned char *p; register unsigned char *p;
register long x; register unsigned long x;
long lSeed; unsigned long ulSeed;
unsigned long cchSeed; unsigned long cchSeed;
unsigned long cHashElements;
if (!PyArg_ParseTuple(args, "iiO:hash", &lSeed, &cchSeed, &a)) if (!PyArg_ParseTuple(args, "llOl:hash",
&ulSeed, &cchSeed, &a, &cHashElements))
return NULL; return NULL;
if (!PyString_Check(a)) if (!PyString_Check(a))
{ {
...@@ -19,13 +21,35 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw) ...@@ -19,13 +21,35 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
len = a->ob_size; len = a->ob_size;
p = (unsigned char *) a->ob_sval; p = (unsigned char *) a->ob_sval;
x = lSeed; x = ulSeed;
while (--len >= 0) while (--len >= 0)
x = (1000003*x) ^ *p++; {
/* (1000003 * x) ^ *p++
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ *p++;
}
x ^= a->ob_size + cchSeed; x ^= a->ob_size + cchSeed;
if (x == -1) if (x == 0xFFFFFFFF)
x = -2; x = 0xfffffffe;
return PyInt_FromLong(x); if (x & 0x80000000)
{
/* Emulate Python 32-bit signed (2's complement)
* modulo operation
*/
x = (~x & 0xFFFFFFFF) + 1;
x %= cHashElements;
if (x != 0)
{
x = x + (~cHashElements & 0xFFFFFFFF) + 1;
x = (~x & 0xFFFFFFFF) + 1;
}
}
else
x %= cHashElements;
return PyInt_FromLong((long)x);
} }
static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw) static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
...@@ -33,7 +57,7 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw) ...@@ -33,7 +57,7 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
PyStringObject *a; PyStringObject *a;
register int len; register int len;
register unsigned char *p; register unsigned char *p;
register long x; register unsigned long x;
if (!PyString_Check(args)) if (!PyString_Check(args))
{ {
...@@ -45,10 +69,17 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw) ...@@ -45,10 +69,17 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
len = a->ob_size; len = a->ob_size;
p = (unsigned char *) a->ob_sval; p = (unsigned char *) a->ob_sval;
x = *p << 7; x = (*p << 7) & 0xFFFFFFFF;
while (--len >= 0) while (--len >= 0)
x = (1000003*x) ^ *p++; {
return PyInt_FromLong(x); /* (1000003 * x) ^ *p++
* translated to handle > 32 bit longs
*/
x = (0xf4243 * x);
x = x & 0xFFFFFFFF;
x = x ^ *p++;
}
return PyInt_FromLong((long)x);
} }
...@@ -68,5 +99,16 @@ void initperfhash(void) ...@@ -68,5 +99,16 @@ void initperfhash(void)
m = Py_InitModule4("perfhash", hashMethods, m = Py_InitModule4("perfhash", hashMethods,
NULL, NULL, PYTHON_API_VERSION); NULL, NULL, PYTHON_API_VERSION);
if ( m == NULL ) if ( m == NULL )
Py_FatalError("can't initialize module hashModule"); Py_FatalError("can't initialize module perfhash");
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment