Kaydet (Commit) 6a1cd1b3 authored tarafından Antoine Pitrou's avatar Antoine Pitrou

Issue #13521: dict.setdefault() now does only one lookup for the given key,…

Issue #13521: dict.setdefault() now does only one lookup for the given key, making it "atomic" for many purposes.
Patch by Filip Gruszczyński.
üst 92904d3a
...@@ -299,6 +299,26 @@ class DictTest(unittest.TestCase): ...@@ -299,6 +299,26 @@ class DictTest(unittest.TestCase):
x.fail = True x.fail = True
self.assertRaises(Exc, d.setdefault, x, []) self.assertRaises(Exc, d.setdefault, x, [])
def test_setdefault_atomic(self):
# Issue #13521: setdefault() calls __hash__ and __eq__ only once.
class Hashed(object):
def __init__(self):
self.hash_count = 0
self.eq_count = 0
def __hash__(self):
self.hash_count += 1
return 42
def __eq__(self, other):
self.eq_count += 1
return id(self) == id(other)
hashed1 = Hashed()
y = {hashed1: 5}
hashed2 = Hashed()
y.setdefault(hashed2, [])
self.assertEqual(hashed1.hash_count, 1)
self.assertEqual(hashed2.hash_count, 1)
self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
def test_popitem(self): def test_popitem(self):
# dict.popitem() # dict.popitem()
for copymode in -1, +1: for copymode in -1, +1:
......
...@@ -9,6 +9,9 @@ What's New in Python 2.7.3 release candidate 1? ...@@ -9,6 +9,9 @@ What's New in Python 2.7.3 release candidate 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #13521: dict.setdefault() now does only one lookup for the given key,
making it "atomic" for many purposes. Patch by Filip Gruszczyński.
- Issue #13020: Fix a reference leak when allocating a structsequence object - Issue #13020: Fix a reference leak when allocating a structsequence object
fails. Patch by Suman Saha. fails. Patch by Suman Saha.
......
...@@ -502,27 +502,16 @@ _PyDict_MaybeUntrack(PyObject *op) ...@@ -502,27 +502,16 @@ _PyDict_MaybeUntrack(PyObject *op)
_PyObject_GC_UNTRACK(op); _PyObject_GC_UNTRACK(op);
} }
/* /*
Internal routine to insert a new item into the table. Internal routine to insert a new item into the table when you have entry object.
Used both by the internal resize routine and by the public insert routine. Used by insertdict.
Eats a reference to key and one to value.
Returns -1 if an error occurred, or 0 on success.
*/ */
static int static int
insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value) insertdict_by_entry(register PyDictObject *mp, PyObject *key, long hash,
PyDictEntry *ep, PyObject *value)
{ {
PyObject *old_value; PyObject *old_value;
register PyDictEntry *ep;
typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, long);
assert(mp->ma_lookup != NULL);
ep = mp->ma_lookup(mp, key, hash);
if (ep == NULL) {
Py_DECREF(key);
Py_DECREF(value);
return -1;
}
MAINTAIN_TRACKING(mp, key, value); MAINTAIN_TRACKING(mp, key, value);
if (ep->me_value != NULL) { if (ep->me_value != NULL) {
old_value = ep->me_value; old_value = ep->me_value;
...@@ -545,6 +534,28 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value) ...@@ -545,6 +534,28 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
return 0; return 0;
} }
/*
Internal routine to insert a new item into the table.
Used both by the internal resize routine and by the public insert routine.
Eats a reference to key and one to value.
Returns -1 if an error occurred, or 0 on success.
*/
static int
insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
{
register PyDictEntry *ep;
assert(mp->ma_lookup != NULL);
ep = mp->ma_lookup(mp, key, hash);
if (ep == NULL) {
Py_DECREF(key);
Py_DECREF(value);
return -1;
}
return insertdict_by_entry(mp, key, hash, ep, value);
}
/* /*
Internal routine used by dictresize() to insert an item which is Internal routine used by dictresize() to insert an item which is
known to be absent from the dict. This routine also assumes that known to be absent from the dict. This routine also assumes that
...@@ -738,6 +749,45 @@ PyDict_GetItem(PyObject *op, PyObject *key) ...@@ -738,6 +749,45 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return ep->me_value; return ep->me_value;
} }
static int
dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key,
long hash, PyDictEntry *ep, PyObject *value)
{
register PyDictObject *mp;
register Py_ssize_t n_used;
mp = (PyDictObject *)op;
assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */
n_used = mp->ma_used;
Py_INCREF(value);
Py_INCREF(key);
if (ep == NULL) {
if (insertdict(mp, key, hash, value) != 0)
return -1;
}
else {
if (insertdict_by_entry(mp, key, hash, ep, value) != 0)
return -1;
}
/* If we added a key, we can safely resize. Otherwise just return!
* If fill >= 2/3 size, adjust size. Normally, this doubles or
* quaduples the size, but it's also possible for the dict to shrink
* (if ma_fill is much larger than ma_used, meaning a lot of dict
* keys have been * deleted).
*
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
* the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
}
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
* dictionary if it's merely replacing the value for an existing key. * dictionary if it's merely replacing the value for an existing key.
* This means that it's safe to loop over a dictionary with PyDict_Next() * This means that it's safe to loop over a dictionary with PyDict_Next()
...@@ -747,9 +797,7 @@ PyDict_GetItem(PyObject *op, PyObject *key) ...@@ -747,9 +797,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
int int
PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
{ {
register PyDictObject *mp;
register long hash; register long hash;
register Py_ssize_t n_used;
if (!PyDict_Check(op)) { if (!PyDict_Check(op)) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
...@@ -757,7 +805,6 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) ...@@ -757,7 +805,6 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
} }
assert(key); assert(key);
assert(value); assert(value);
mp = (PyDictObject *)op;
if (PyString_CheckExact(key)) { if (PyString_CheckExact(key)) {
hash = ((PyStringObject *)key)->ob_shash; hash = ((PyStringObject *)key)->ob_shash;
if (hash == -1) if (hash == -1)
...@@ -768,29 +815,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) ...@@ -768,29 +815,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
if (hash == -1) if (hash == -1)
return -1; return -1;
} }
assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */ return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value);
n_used = mp->ma_used;
Py_INCREF(value);
Py_INCREF(key);
if (insertdict(mp, key, hash, value) != 0)
return -1;
/* If we added a key, we can safely resize. Otherwise just return!
* If fill >= 2/3 size, adjust size. Normally, this doubles or
* quaduples the size, but it's also possible for the dict to shrink
* (if ma_fill is much larger than ma_used, meaning a lot of dict
* keys have been * deleted).
*
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
* the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
} }
int int
...@@ -1957,9 +1982,9 @@ dict_setdefault(register PyDictObject *mp, PyObject *args) ...@@ -1957,9 +1982,9 @@ dict_setdefault(register PyDictObject *mp, PyObject *args)
return NULL; return NULL;
val = ep->me_value; val = ep->me_value;
if (val == NULL) { if (val == NULL) {
val = failobj; if (dict_set_item_by_hash_or_entry((PyObject*)mp, key, hash, ep,
if (PyDict_SetItem((PyObject*)mp, key, failobj)) failobj) == 0)
val = NULL; val = failobj;
} }
Py_XINCREF(val); Py_XINCREF(val);
return val; return val;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment