Kaydet (Commit) 2a61e742 authored tarafından Guido van Rossum's avatar Guido van Rossum

String interning.

üst ee5cf9b6
...@@ -87,6 +87,9 @@ typedef struct { ...@@ -87,6 +87,9 @@ typedef struct {
int ma_fill; int ma_fill;
int ma_used; int ma_used;
int ma_size; int ma_size;
#ifdef INTERN_STRINGS
int ma_fast;
#endif
mappingentry *ma_table; mappingentry *ma_table;
} mappingobject; } mappingobject;
...@@ -106,6 +109,9 @@ newmappingobject() ...@@ -106,6 +109,9 @@ newmappingobject()
mp->ma_table = NULL; mp->ma_table = NULL;
mp->ma_fill = 0; mp->ma_fill = 0;
mp->ma_used = 0; mp->ma_used = 0;
#ifdef INTERN_STRINGS
mp->ma_fast = 1;
#endif
return (object *)mp; return (object *)mp;
} }
...@@ -163,17 +169,40 @@ lookmapping(mp, key, hash) ...@@ -163,17 +169,40 @@ lookmapping(mp, key, hash)
unsigned long sum; unsigned long sum;
int incr; int incr;
int size; int size;
#ifdef INTERN_STRINGS
int fast;
#endif
ep = &mp->ma_table[(unsigned long)hash%mp->ma_size]; ep = &mp->ma_table[(unsigned long)hash%mp->ma_size];
ekey = ep->me_key; ekey = ep->me_key;
if (ekey == NULL) if (ekey == NULL)
return ep; return ep;
#ifdef INTERN_STRINGS
if ((fast = mp->ma_fast)) {
object *ikey;
if (!is_stringobject(key) ||
(ikey = ((stringobject *)key)->ob_sinterned) == NULL)
fast = 0;
else
key = ikey;
}
#endif
if (ekey == dummy) if (ekey == dummy)
freeslot = ep; freeslot = ep;
else if (ep->me_hash == hash && cmpobject(ekey, key) == 0) else {
#ifdef INTERN_STRINGS
if (fast) {
if (ekey == key)
return ep; return ep;
}
else else
#endif
{
if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
return ep;
}
freeslot = NULL; freeslot = NULL;
}
size = mp->ma_size; size = mp->ma_size;
sum = hash; sum = hash;
...@@ -184,6 +213,36 @@ lookmapping(mp, key, hash) ...@@ -184,6 +213,36 @@ lookmapping(mp, key, hash)
end = mp->ma_table + size; end = mp->ma_table + size;
#ifdef INTERN_STRINGS
if (fast) {
if (freeslot == NULL) {
for (;;) {
ep += incr;
if (ep >= end)
ep -= size;
ekey = ep->me_key;
if (ekey == NULL || ekey == key)
return ep;
if (ekey == dummy) {
freeslot = ep;
break;
}
}
}
for (;;) {
ep += incr;
if (ep >= end)
ep -= size;
ekey = ep->me_key;
if (ekey == NULL)
return freeslot;
if (ekey == key)
return ep;
}
}
#endif
if (freeslot == NULL) { if (freeslot == NULL) {
for (;;) { for (;;) {
ep += incr; ep += incr;
...@@ -339,13 +398,35 @@ mappinginsert(op, key, value) ...@@ -339,13 +398,35 @@ mappinginsert(op, key, value)
err_badcall(); err_badcall();
return -1; return -1;
} }
mp = (mappingobject *)op;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!is_stringobject(key) || (hash = ((stringobject *) key)->ob_shash) == -1) if (is_stringobject(key)) {
#ifdef INTERN_STRINGS
if (((stringobject *)key)->ob_sinterned != NULL) {
key = ((stringobject *)key)->ob_sinterned;
hash = ((stringobject *)key)->ob_shash;
}
else
#endif #endif
{
hash = ((stringobject *)key)->ob_shash;
if (hash == -1)
hash = hashobject(key);
#ifdef INTERN_STRINGS
mp->ma_fast = 0;
#endif
}
}
else
#endif
{
hash = hashobject(key); hash = hashobject(key);
if (hash == -1) if (hash == -1)
return -1; return -1;
mp = (mappingobject *)op; #ifdef INTERN_STRINGS
mp->ma_fast = 0;
#endif
}
/* if fill >= 2/3 size, resize */ /* if fill >= 2/3 size, resize */
if (mp->ma_fill*3 >= mp->ma_size*2) { if (mp->ma_fill*3 >= mp->ma_size*2) {
if (mappingresize(mp) != 0) { if (mappingresize(mp) != 0) {
...@@ -907,16 +988,22 @@ setattro(v, name, value) ...@@ -907,16 +988,22 @@ setattro(v, name, value)
object *name; object *name;
object *value; object *value;
{ {
int err;
INCREF(name);
PyString_InternInPlace(&name);
if (v->ob_type->tp_setattro != NULL) if (v->ob_type->tp_setattro != NULL)
return (*v->ob_type->tp_setattro)(v, name, value); err = (*v->ob_type->tp_setattro)(v, name, value);
else {
if (name != last_name_object) { if (name != last_name_object) {
XDECREF(last_name_object); XDECREF(last_name_object);
INCREF(name); INCREF(name);
last_name_object = name; last_name_object = name;
last_name_char = getstringvalue(name); last_name_char = getstringvalue(name);
} }
return setattr(v, last_name_char, value); err = setattr(v, last_name_char, value);
}
DECREF(name);
return err;
} }
object * object *
...@@ -931,6 +1018,7 @@ dictlookup(v, key) ...@@ -931,6 +1018,7 @@ dictlookup(v, key)
last_name_char = NULL; last_name_char = NULL;
return NULL; return NULL;
} }
PyString_InternInPlace(&last_name_object);
last_name_char = getstringvalue(last_name_object); last_name_char = getstringvalue(last_name_object);
} }
return mappinglookup(v, last_name_object); return mappinglookup(v, last_name_object);
...@@ -949,6 +1037,7 @@ dictinsert(v, key, item) ...@@ -949,6 +1037,7 @@ dictinsert(v, key, item)
last_name_char = NULL; last_name_char = NULL;
return -1; return -1;
} }
PyString_InternInPlace(&last_name_object);
last_name_char = getstringvalue(last_name_object); last_name_char = getstringvalue(last_name_object);
} }
return mappinginsert(v, last_name_object, item); return mappinginsert(v, last_name_object, item);
......
...@@ -87,6 +87,9 @@ typedef struct { ...@@ -87,6 +87,9 @@ typedef struct {
int ma_fill; int ma_fill;
int ma_used; int ma_used;
int ma_size; int ma_size;
#ifdef INTERN_STRINGS
int ma_fast;
#endif
mappingentry *ma_table; mappingentry *ma_table;
} mappingobject; } mappingobject;
...@@ -106,6 +109,9 @@ newmappingobject() ...@@ -106,6 +109,9 @@ newmappingobject()
mp->ma_table = NULL; mp->ma_table = NULL;
mp->ma_fill = 0; mp->ma_fill = 0;
mp->ma_used = 0; mp->ma_used = 0;
#ifdef INTERN_STRINGS
mp->ma_fast = 1;
#endif
return (object *)mp; return (object *)mp;
} }
...@@ -163,17 +169,40 @@ lookmapping(mp, key, hash) ...@@ -163,17 +169,40 @@ lookmapping(mp, key, hash)
unsigned long sum; unsigned long sum;
int incr; int incr;
int size; int size;
#ifdef INTERN_STRINGS
int fast;
#endif
ep = &mp->ma_table[(unsigned long)hash%mp->ma_size]; ep = &mp->ma_table[(unsigned long)hash%mp->ma_size];
ekey = ep->me_key; ekey = ep->me_key;
if (ekey == NULL) if (ekey == NULL)
return ep; return ep;
#ifdef INTERN_STRINGS
if ((fast = mp->ma_fast)) {
object *ikey;
if (!is_stringobject(key) ||
(ikey = ((stringobject *)key)->ob_sinterned) == NULL)
fast = 0;
else
key = ikey;
}
#endif
if (ekey == dummy) if (ekey == dummy)
freeslot = ep; freeslot = ep;
else if (ep->me_hash == hash && cmpobject(ekey, key) == 0) else {
#ifdef INTERN_STRINGS
if (fast) {
if (ekey == key)
return ep; return ep;
}
else else
#endif
{
if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
return ep;
}
freeslot = NULL; freeslot = NULL;
}
size = mp->ma_size; size = mp->ma_size;
sum = hash; sum = hash;
...@@ -184,6 +213,36 @@ lookmapping(mp, key, hash) ...@@ -184,6 +213,36 @@ lookmapping(mp, key, hash)
end = mp->ma_table + size; end = mp->ma_table + size;
#ifdef INTERN_STRINGS
if (fast) {
if (freeslot == NULL) {
for (;;) {
ep += incr;
if (ep >= end)
ep -= size;
ekey = ep->me_key;
if (ekey == NULL || ekey == key)
return ep;
if (ekey == dummy) {
freeslot = ep;
break;
}
}
}
for (;;) {
ep += incr;
if (ep >= end)
ep -= size;
ekey = ep->me_key;
if (ekey == NULL)
return freeslot;
if (ekey == key)
return ep;
}
}
#endif
if (freeslot == NULL) { if (freeslot == NULL) {
for (;;) { for (;;) {
ep += incr; ep += incr;
...@@ -339,13 +398,35 @@ mappinginsert(op, key, value) ...@@ -339,13 +398,35 @@ mappinginsert(op, key, value)
err_badcall(); err_badcall();
return -1; return -1;
} }
mp = (mappingobject *)op;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!is_stringobject(key) || (hash = ((stringobject *) key)->ob_shash) == -1) if (is_stringobject(key)) {
#ifdef INTERN_STRINGS
if (((stringobject *)key)->ob_sinterned != NULL) {
key = ((stringobject *)key)->ob_sinterned;
hash = ((stringobject *)key)->ob_shash;
}
else
#endif #endif
{
hash = ((stringobject *)key)->ob_shash;
if (hash == -1)
hash = hashobject(key);
#ifdef INTERN_STRINGS
mp->ma_fast = 0;
#endif
}
}
else
#endif
{
hash = hashobject(key); hash = hashobject(key);
if (hash == -1) if (hash == -1)
return -1; return -1;
mp = (mappingobject *)op; #ifdef INTERN_STRINGS
mp->ma_fast = 0;
#endif
}
/* if fill >= 2/3 size, resize */ /* if fill >= 2/3 size, resize */
if (mp->ma_fill*3 >= mp->ma_size*2) { if (mp->ma_fill*3 >= mp->ma_size*2) {
if (mappingresize(mp) != 0) { if (mappingresize(mp) != 0) {
...@@ -907,16 +988,22 @@ setattro(v, name, value) ...@@ -907,16 +988,22 @@ setattro(v, name, value)
object *name; object *name;
object *value; object *value;
{ {
int err;
INCREF(name);
PyString_InternInPlace(&name);
if (v->ob_type->tp_setattro != NULL) if (v->ob_type->tp_setattro != NULL)
return (*v->ob_type->tp_setattro)(v, name, value); err = (*v->ob_type->tp_setattro)(v, name, value);
else {
if (name != last_name_object) { if (name != last_name_object) {
XDECREF(last_name_object); XDECREF(last_name_object);
INCREF(name); INCREF(name);
last_name_object = name; last_name_object = name;
last_name_char = getstringvalue(name); last_name_char = getstringvalue(name);
} }
return setattr(v, last_name_char, value); err = setattr(v, last_name_char, value);
}
DECREF(name);
return err;
} }
object * object *
...@@ -931,6 +1018,7 @@ dictlookup(v, key) ...@@ -931,6 +1018,7 @@ dictlookup(v, key)
last_name_char = NULL; last_name_char = NULL;
return NULL; return NULL;
} }
PyString_InternInPlace(&last_name_object);
last_name_char = getstringvalue(last_name_object); last_name_char = getstringvalue(last_name_object);
} }
return mappinglookup(v, last_name_object); return mappinglookup(v, last_name_object);
...@@ -949,6 +1037,7 @@ dictinsert(v, key, item) ...@@ -949,6 +1037,7 @@ dictinsert(v, key, item)
last_name_char = NULL; last_name_char = NULL;
return -1; return -1;
} }
PyString_InternInPlace(&last_name_object);
last_name_char = getstringvalue(last_name_object); last_name_char = getstringvalue(last_name_object);
} }
return mappinginsert(v, last_name_object, item); return mappinginsert(v, last_name_object, item);
......
...@@ -97,6 +97,9 @@ newsizedstringobject(str, size) ...@@ -97,6 +97,9 @@ newsizedstringobject(str, size)
op->ob_size = size; op->ob_size = size;
#ifdef CACHE_HASH #ifdef CACHE_HASH
op->ob_shash = -1; op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif #endif
NEWREF(op); NEWREF(op);
if (str != NULL) if (str != NULL)
...@@ -144,6 +147,9 @@ newstringobject(str) ...@@ -144,6 +147,9 @@ newstringobject(str)
op->ob_size = size; op->ob_size = size;
#ifdef CACHE_HASH #ifdef CACHE_HASH
op->ob_shash = -1; op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif #endif
NEWREF(op); NEWREF(op);
strcpy(op->ob_sval, str); strcpy(op->ob_sval, str);
...@@ -303,6 +309,9 @@ string_concat(a, bb) ...@@ -303,6 +309,9 @@ string_concat(a, bb)
op->ob_size = size; op->ob_size = size;
#ifdef CACHE_HASH #ifdef CACHE_HASH
op->ob_shash = -1; op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif #endif
NEWREF(op); NEWREF(op);
memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
...@@ -335,6 +344,9 @@ string_repeat(a, n) ...@@ -335,6 +344,9 @@ string_repeat(a, n)
op->ob_size = size; op->ob_size = size;
#ifdef CACHE_HASH #ifdef CACHE_HASH
op->ob_shash = -1; op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif #endif
NEWREF(op); NEWREF(op);
for (i = 0; i < size; i += a->ob_size) for (i = 0; i < size; i += a->ob_size)
...@@ -462,6 +474,13 @@ typeobject Stringtype = { ...@@ -462,6 +474,13 @@ typeobject Stringtype = {
&string_as_sequence, /*tp_as_sequence*/ &string_as_sequence, /*tp_as_sequence*/
0, /*tp_as_mapping*/ 0, /*tp_as_mapping*/
(hashfunc)string_hash, /*tp_hash*/ (hashfunc)string_hash, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_xxx3*/
0, /*tp_xxx4*/
0, /*tp_doc*/
}; };
void void
...@@ -928,3 +947,59 @@ formatstring(format, args) ...@@ -928,3 +947,59 @@ formatstring(format, args)
DECREF(args); DECREF(args);
return NULL; return NULL;
} }
#ifdef INTERN_STRINGS
static PyObject *interned;
void
PyString_InternInPlace(p)
PyObject **p;
{
register PyStringObject *s = (PyStringObject *)(*p);
PyObject *t;
if (s == NULL || !PyString_Check(s))
Py_FatalError("PyString_InternInPlace: strings only please!");
if ((t = s->ob_sinterned) != NULL) {
if (t == (PyObject *)s)
return;
Py_INCREF(t);
*p = t;
Py_DECREF(s);
return;
}
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL)
return;
/* Force slow lookups: */
PyDict_SetItem(interned, Py_None, Py_None);
}
if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
Py_INCREF(t);
*p = s->ob_sinterned = t;
Py_DECREF(s);
return;
}
t = (PyObject *)s;
if (PyDict_SetItem(interned, t, t) == 0) {
s->ob_sinterned = t;
return;
}
PyErr_Clear();
}
PyObject *
PyString_InternFromString(cp)
const char *cp;
{
PyObject *s = PyString_FromString(cp);
if (s == NULL)
return NULL;
PyString_InternInPlace(&s);
return s;
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment