dictobject.c 29.9 KB
Newer Older
1

2
/* Dictionary object implementation using a hash table */
3

4
#include "Python.h"
5 6 7


/*
8
 * MINSIZE is the minimum size of a dictionary.
9 10 11 12
 */

#define MINSIZE 4

13 14 15
/* define this out if you don't want conversion statistics on exit */
#undef SHOW_CONVERSION_COUNTS

16 17
/*
Table of irreducible polynomials to efficiently cycle through
18
GF(2^n)-{0}, 2<=n<=30.  A table size is always a power of 2.
19
*/
20
static long polys[] = {
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
	4 + 3,
	8 + 3,
	16 + 3,
	32 + 5,
	64 + 3,
	128 + 3,
	256 + 29,
	512 + 17,
	1024 + 9,
	2048 + 5,
	4096 + 83,
	8192 + 27,
	16384 + 43,
	32768 + 3,
	65536 + 45,
	131072 + 9,
	262144 + 39,
	524288 + 39,
	1048576 + 9,
	2097152 + 5,
	4194304 + 3,
	8388608 + 33,
	16777216 + 27,
	33554432 + 9,
	67108864 + 71,
	134217728 + 39,
	268435456 + 9,
	536870912 + 5,
	1073741824 + 83,
	0
51 52 53
};

/* Object used as dummy key to fill deleted entries */
54
static PyObject *dummy; /* Initialized by first call to newdictobject() */
55 56

/*
57 58 59 60 61 62 63 64 65 66 67
There are three kinds of slots in the table:

1. Unused.  me_key == me_value == NULL
   Does not hold an active (key, value) pair now and never did.  Unused can
   transition to Active upon key insertion.  This is the only case in which
   me_key is NULL, and is each slot's initial state.

2. Active.  me_key != NULL and me_key != dummy and me_value != NULL
   Holds an active (key, value) pair.  Active can transition to Dummy upon
   key deletion.  This is the only case in which me_value != NULL.

68
3. Dummy.  me_key == dummy and me_value == NULL
69 70 71 72 73
   Previously held an active (key, value) pair, but that was deleted and an
   active pair has not yet overwritten the slot.  Dummy can transition to
   Active upon key insertion.  Dummy slots cannot be made Unused again
   (cannot have me_key set to NULL), else the probe sequence in case of
   collision would have no way to know they were once active.
74 75 76 77

Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
hold a search finger.  The me_hash field of Unused or Dummy slots has no
meaning otherwise.
78 79
*/
typedef struct {
80
	long me_hash;      /* cached hash code of me_key */
81 82
	PyObject *me_key;
	PyObject *me_value;
83 84 85
#ifdef USE_CACHE_ALIGNED
	long	aligner;
#endif
86
} dictentry;
87 88

/*
89
To ensure the lookup algorithm terminates, there must be at least one Unused
90 91 92 93 94 95
slot (NULL key) in the table.
The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
values == the number of Active items).
To avoid slowing down lookups on a near-full table, we resize the table when
it is more than half filled.
96
*/
97 98
typedef struct dictobject dictobject;
struct dictobject {
99
	PyObject_HEAD
100 101 102 103
	int ma_fill;  /* # Active + # Dummy */
	int ma_used;  /* # Active */
	int ma_size;  /* total # slots in ma_table */
	int ma_poly;  /* appopriate entry from polys vector */
104
	dictentry *ma_table;
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
	dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
};

/* forward declarations */
static dictentry *
lookdict_string(dictobject *mp, PyObject *key, long hash);

#ifdef SHOW_CONVERSION_COUNTS
static long created = 0L;
static long converted = 0L;

static void
show_counts(void)
{
	fprintf(stderr, "created %ld string dicts\n", created);
	fprintf(stderr, "converted %ld to normal dicts\n", converted);
	fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
}
#endif
124

125
PyObject *
126
PyDict_New(void)
127
{
128
	register dictobject *mp;
129
	if (dummy == NULL) { /* Auto-initialize dummy */
130
		dummy = PyString_FromString("<dummy key>");
131 132
		if (dummy == NULL)
			return NULL;
133 134 135
#ifdef SHOW_CONVERSION_COUNTS
		Py_AtExit(show_counts);
#endif
136
	}
137
	mp = PyObject_NEW(dictobject, &PyDict_Type);
138 139
	if (mp == NULL)
		return NULL;
140
	mp->ma_size = 0;
141
	mp->ma_poly = 0;
142
	mp->ma_table = NULL;
143 144
	mp->ma_fill = 0;
	mp->ma_used = 0;
145 146 147 148
	mp->ma_lookup = lookdict_string;
#ifdef SHOW_CONVERSION_COUNTS
	++created;
#endif
149
	PyObject_GC_Init(mp);
150
	return (PyObject *)mp;
151 152 153 154
}

/*
The basic lookup function used by all operations.
155
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
156 157
Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead).
158
However, instead of going through the table at constant steps, we cycle
159
through the values of GF(2^n).  This avoids modulo computations, being
160
much cheaper on RISC machines, without leading to clustering.
161

162
The initial probe index is computed as hash mod the table size.
163 164
Subsequent probe indices use the values of x^i in GF(2^n)-{0} as an offset,
where x is a root. The initial offset is derived from hash, too.
165 166

All arithmetic on hash should ignore overflow.
167 168

(This version is due to Reimer Behrends, some ideas are also due to
169
Jyrki Alakuijala and Vladimir Marangozov.)
170 171 172 173

This function must never return NULL; failures are indicated by returning
a dictentry* for which the me_value field is NULL.  Exceptions are never
reported by this function, and outstanding exceptions are maintained.
174
*/
175
static dictentry *
176
lookdict(dictobject *mp, PyObject *key, register long hash)
177
{
178 179
	register int i;
	register unsigned incr;
180
	register dictentry *freeslot;
181
	register unsigned int mask = mp->ma_size-1;
182 183
	dictentry *ep0 = mp->ma_table;
	register dictentry *ep;
184 185 186 187
	register int restore_error = 0;
	register int checked_error = 0;
	register int cmp;
	PyObject *err_type, *err_value, *err_tb;
188
	/* We must come up with (i, incr) such that 0 <= i < ma_size
189 190
	   and 0 < incr < ma_size and both are a function of hash.
	   i is the initial table index and incr the initial probe offset. */
191 192
	i = (~hash) & mask;
	/* We use ~hash instead of hash, as degenerate hash functions, such
193
	   as for ints <sigh>, can have lots of leading zeros. It's not
194 195 196
	   really a performance risk, but better safe than sorry.
	   12-Dec-00 tim:  so ~hash produces lots of leading ones instead --
	   what's the gain? */
197
	ep = &ep0[i];
198
	if (ep->me_key == NULL || ep->me_key == key)
199
		return ep;
200
	if (ep->me_key == dummy)
201
		freeslot = ep;
202
	else {
203 204 205 206 207 208 209
		if (ep->me_hash == hash) {
			/* error can't have been checked yet */
			checked_error = 1;
			if (PyErr_Occurred()) {
				restore_error = 1;
				PyErr_Fetch(&err_type, &err_value, &err_tb);
			}
Guido van Rossum's avatar
Guido van Rossum committed
210 211
			cmp = PyObject_RichCompareBool(ep->me_key, key, Py_EQ);
			if (cmp > 0) {
212 213 214 215 216
				if (restore_error)
					PyErr_Restore(err_type, err_value,
						      err_tb);
				return ep;
			}
Guido van Rossum's avatar
Guido van Rossum committed
217 218
			else if (cmp < 0)
				PyErr_Clear();
219 220
		}
		freeslot = NULL;
Guido van Rossum's avatar
Guido van Rossum committed
221
	}
222
	/* Derive incr from hash, just to make it more arbitrary. Note that
223
	   incr must not be 0, or we will get into an infinite loop.*/
224
	incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
225 226 227
	if (!incr)
		incr = mask;
	for (;;) {
228
		ep = &ep0[(i+incr)&mask];
229
		if (ep->me_key == NULL) {
230 231
			if (restore_error)
				PyErr_Restore(err_type, err_value, err_tb);
232 233 234
			if (freeslot != NULL)
				return freeslot;
			else
235
				return ep;
236 237 238
		}
		if (ep->me_key == dummy) {
			if (freeslot == NULL)
239 240
				freeslot = ep;
		}
241 242 243
		else if (ep->me_key == key) {
			if (restore_error)
				PyErr_Restore(err_type, err_value, err_tb);
244
			return ep;
245 246 247 248 249 250 251 252 253 254
                }
                else if (ep->me_hash == hash) {
			if (!checked_error) {
				checked_error = 1;
				if (PyErr_Occurred()) {
					restore_error = 1;
					PyErr_Fetch(&err_type, &err_value,
						    &err_tb);
				}
			}
Guido van Rossum's avatar
Guido van Rossum committed
255 256
			cmp = PyObject_RichCompareBool(ep->me_key, key, Py_EQ);
			if (cmp > 0) {
257 258 259 260 261
				if (restore_error)
					PyErr_Restore(err_type, err_value,
						      err_tb);
				return ep;
			}
Guido van Rossum's avatar
Guido van Rossum committed
262 263
			else if (cmp < 0)
				PyErr_Clear();
264 265 266 267
		}
		/* Cycle through GF(2^n)-{0} */
		incr = incr << 1;
		if (incr > mask)
268
			incr ^= mp->ma_poly; /* This will implicitly clear
269
						the highest bit */
270 271 272
	}
}

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
/*
 * Hacked up version of lookdict which can assume keys are always strings;
 * this assumption allows testing for errors during PyObject_Compare() to
 * be dropped; string-string comparisons never raise exceptions.  This also
 * means we don't need to go through PyObject_Compare(); we can always use
 * the tp_compare slot of the string type object directly.
 *
 * This really only becomes meaningful if proper error handling in lookdict()
 * is too expensive.
 */
static dictentry *
lookdict_string(dictobject *mp, PyObject *key, register long hash)
{
	register int i;
	register unsigned incr;
	register dictentry *freeslot;
	register unsigned int mask = mp->ma_size-1;
	dictentry *ep0 = mp->ma_table;
	register dictentry *ep;
        cmpfunc compare = PyString_Type.tp_compare;

	/* make sure this function doesn't have to handle non-string keys */
	if (!PyString_Check(key)) {
#ifdef SHOW_CONVERSION_COUNTS
		++converted;
#endif
		mp->ma_lookup = lookdict;
		return lookdict(mp, key, hash);
	}
	/* We must come up with (i, incr) such that 0 <= i < ma_size
	   and 0 < incr < ma_size and both are a function of hash */
	i = (~hash) & mask;
	/* We use ~hash instead of hash, as degenerate hash functions, such
	   as for ints <sigh>, can have lots of leading zeros. It's not
	   really a performance risk, but better safe than sorry. */
	ep = &ep0[i];
	if (ep->me_key == NULL || ep->me_key == key)
		return ep;
	if (ep->me_key == dummy)
		freeslot = ep;
	else {
		if (ep->me_hash == hash
                    && compare(ep->me_key, key) == 0) {
			return ep;
		}
		freeslot = NULL;
	}
	/* Derive incr from hash, just to make it more arbitrary. Note that
	   incr must not be 0, or we will get into an infinite loop.*/
	incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
	if (!incr)
		incr = mask;
	for (;;) {
		ep = &ep0[(i+incr)&mask];
		if (ep->me_key == NULL) {
			if (freeslot != NULL)
				return freeslot;
			else
				return ep;
		}
		if (ep->me_key == dummy) {
			if (freeslot == NULL)
				freeslot = ep;
		}
		else if (ep->me_key == key
			 || (ep->me_hash == hash
			     && compare(ep->me_key, key) == 0)) {
			return ep;
                }
		/* Cycle through GF(2^n)-{0} */
		incr = incr << 1;
		if (incr > mask)
			incr ^= mp->ma_poly; /* This will implicitly clear
						the highest bit */
	}
}

350 351 352 353 354 355
/*
Internal routine to insert a new item into the table.
Used both by the internal resize routine and by the public insert routine.
Eats a reference to key and one to value.
*/
static void
356
insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
357
{
358
	PyObject *old_value;
359
	register dictentry *ep;
360
	ep = (mp->ma_lookup)(mp, key, hash);
361
	if (ep->me_value != NULL) {
362 363
		old_value = ep->me_value;
		ep->me_value = value;
364 365
		Py_DECREF(old_value); /* which **CAN** re-enter */
		Py_DECREF(key);
366 367 368 369 370
	}
	else {
		if (ep->me_key == NULL)
			mp->ma_fill++;
		else
371
			Py_DECREF(ep->me_key);
372 373
		ep->me_key = key;
		ep->me_hash = hash;
374
		ep->me_value = value;
375 376 377 378 379 380 381 382 383 384
		mp->ma_used++;
	}
}

/*
Restructure the table by allocating a new table and reinserting all
items again.  When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
385
dictresize(dictobject *mp, int minused)
386 387
{
	register int oldsize = mp->ma_size;
388
	register int newsize, newpoly;
389 390 391
	register dictentry *oldtable = mp->ma_table;
	register dictentry *newtable;
	register dictentry *ep;
392
	register int i;
393 394 395
	for (i = 0, newsize = MINSIZE; ; i++, newsize <<= 1) {
		if (i > sizeof(polys)/sizeof(polys[0])) {
			/* Ran out of polynomials */
396
			PyErr_NoMemory();
397 398
			return -1;
		}
399
		if (newsize > minused) {
400
			newpoly = polys[i];
401 402 403
			break;
		}
	}
404
	newtable = PyMem_NEW(dictentry, newsize);
405
	if (newtable == NULL) {
406
		PyErr_NoMemory();
407 408
		return -1;
	}
409
	memset(newtable, '\0', sizeof(dictentry) * newsize);
410
	mp->ma_size = newsize;
411
	mp->ma_poly = newpoly;
412 413 414
	mp->ma_table = newtable;
	mp->ma_fill = 0;
	mp->ma_used = 0;
415 416 417

	/* Make two passes, so we can avoid decrefs
	   (and possible side effects) till the table is copied */
418 419
	for (i = 0, ep = oldtable; i < oldsize; i++, ep++) {
		if (ep->me_value != NULL)
420
			insertdict(mp,ep->me_key,ep->me_hash,ep->me_value);
421 422
	}
	for (i = 0, ep = oldtable; i < oldsize; i++, ep++) {
423
		if (ep->me_value == NULL) {
424
			Py_XDECREF(ep->me_key);
425
		}
426
	}
427

428 429
	if (oldtable != NULL)
		PyMem_DEL(oldtable);
430 431 432
	return 0;
}

433
PyObject *
434
PyDict_GetItem(PyObject *op, PyObject *key)
435 436
{
	long hash;
437
	dictobject *mp = (dictobject *)op;
438
	if (!PyDict_Check(op)) {
439 440
		return NULL;
	}
441
	if (mp->ma_table == NULL)
442
		return NULL;
443
#ifdef CACHE_HASH
444 445
	if (!PyString_Check(key) ||
	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
446
#endif
447
	{
448
		hash = PyObject_Hash(key);
449 450
		if (hash == -1) {
			PyErr_Clear();
451
			return NULL;
452
		}
453
	}
454
	return (mp->ma_lookup)(mp, key, hash)->me_value;
455 456 457
}

int
458
PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
459
{
460
	register dictobject *mp;
461
	register long hash;
462 463
	if (!PyDict_Check(op)) {
		PyErr_BadInternalCall();
464 465
		return -1;
	}
466
	mp = (dictobject *)op;
467
#ifdef CACHE_HASH
468
	if (PyString_Check(key)) {
Guido van Rossum's avatar
Guido van Rossum committed
469
#ifdef INTERN_STRINGS
470 471 472
		if (((PyStringObject *)key)->ob_sinterned != NULL) {
			key = ((PyStringObject *)key)->ob_sinterned;
			hash = ((PyStringObject *)key)->ob_shash;
Guido van Rossum's avatar
Guido van Rossum committed
473 474
		}
		else
475
#endif
Guido van Rossum's avatar
Guido van Rossum committed
476
		{
477
			hash = ((PyStringObject *)key)->ob_shash;
Guido van Rossum's avatar
Guido van Rossum committed
478
			if (hash == -1)
479
				hash = PyObject_Hash(key);
Guido van Rossum's avatar
Guido van Rossum committed
480 481 482 483 484
		}
	}
	else
#endif
	{
485
		hash = PyObject_Hash(key);
Guido van Rossum's avatar
Guido van Rossum committed
486 487 488
		if (hash == -1)
			return -1;
	}
489
	/* if fill >= 2/3 size, double in size */
490
	if (mp->ma_fill*3 >= mp->ma_size*2) {
491
		if (dictresize(mp, mp->ma_used*2) != 0) {
492 493 494 495
			if (mp->ma_fill+1 > mp->ma_size)
				return -1;
		}
	}
496 497
	Py_INCREF(value);
	Py_INCREF(key);
498
	insertdict(mp, key, hash, value);
499 500 501 502
	return 0;
}

int
503
PyDict_DelItem(PyObject *op, PyObject *key)
504
{
505
	register dictobject *mp;
506
	register long hash;
507
	register dictentry *ep;
508
	PyObject *old_value, *old_key;
509

510 511
	if (!PyDict_Check(op)) {
		PyErr_BadInternalCall();
512 513
		return -1;
	}
514
#ifdef CACHE_HASH
515 516
	if (!PyString_Check(key) ||
	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
517
#endif
518
	{
519
		hash = PyObject_Hash(key);
520 521 522
		if (hash == -1)
			return -1;
	}
523 524
	mp = (dictobject *)op;
	if (((dictobject *)op)->ma_table == NULL)
525
		goto empty;
526
	ep = (mp->ma_lookup)(mp, key, hash);
527
	if (ep->me_value == NULL) {
528
	empty:
529
		PyErr_SetObject(PyExc_KeyError, key);
530 531
		return -1;
	}
532
	old_key = ep->me_key;
533
	Py_INCREF(dummy);
534
	ep->me_key = dummy;
535
	old_value = ep->me_value;
536 537
	ep->me_value = NULL;
	mp->ma_used--;
538 539
	Py_DECREF(old_value);
	Py_DECREF(old_key);
540 541 542
	return 0;
}

543
void
544
PyDict_Clear(PyObject *op)
545
{
546
	int i, n;
547 548
	register dictentry *table;
	dictobject *mp;
549
	if (!PyDict_Check(op))
550
		return;
551
	mp = (dictobject *)op;
552 553 554 555 556 557 558
	table = mp->ma_table;
	if (table == NULL)
		return;
	n = mp->ma_size;
	mp->ma_size = mp->ma_used = mp->ma_fill = 0;
	mp->ma_table = NULL;
	for (i = 0; i < n; i++) {
559 560
		Py_XDECREF(table[i].me_key);
		Py_XDECREF(table[i].me_value);
561
	}
562
	PyMem_DEL(table);
563 564
}

565
int
566
PyDict_Next(PyObject *op, int *ppos, PyObject **pkey, PyObject **pvalue)
567
{
568
	int i;
569
	register dictobject *mp;
570
	if (!PyDict_Check(op))
571
		return 0;
572
	mp = (dictobject *)op;
573 574 575 576 577 578 579 580 581 582 583 584 585
	i = *ppos;
	if (i < 0)
		return 0;
	while (i < mp->ma_size && mp->ma_table[i].me_value == NULL)
		i++;
	*ppos = i+1;
	if (i >= mp->ma_size)
		return 0;
	if (pkey)
		*pkey = mp->ma_table[i].me_key;
	if (pvalue)
		*pvalue = mp->ma_table[i].me_value;
	return 1;
586 587 588 589 590
}

/* Methods */

static void
591
dict_dealloc(register dictobject *mp)
592 593
{
	register int i;
594
	register dictentry *ep;
595
	Py_TRASHCAN_SAFE_BEGIN(mp)
596
	PyObject_GC_Fini(mp);
597
	for (i = 0, ep = mp->ma_table; i < mp->ma_size; i++, ep++) {
598
		if (ep->me_key != NULL) {
599
			Py_DECREF(ep->me_key);
600 601
		}
		if (ep->me_value != NULL) {
602
			Py_DECREF(ep->me_value);
603
		}
604
	}
605 606
	if (mp->ma_table != NULL)
		PyMem_DEL(mp->ma_table);
607
	mp = (dictobject *) PyObject_AS_GC(mp);
608
	PyObject_DEL(mp);
609
	Py_TRASHCAN_SAFE_END(mp)
610 611 612
}

static int
613
dict_print(register dictobject *mp, register FILE *fp, register int flags)
614 615 616
{
	register int i;
	register int any;
617
	register dictentry *ep;
618 619 620 621 622 623 624 625 626

	i = Py_ReprEnter((PyObject*)mp);
	if (i != 0) {
		if (i < 0)
			return i;
		fprintf(fp, "{...}");
		return 0;
	}

627 628 629 630 631 632
	fprintf(fp, "{");
	any = 0;
	for (i = 0, ep = mp->ma_table; i < mp->ma_size; i++, ep++) {
		if (ep->me_value != NULL) {
			if (any++ > 0)
				fprintf(fp, ", ");
633 634
			if (PyObject_Print((PyObject *)ep->me_key, fp, 0)!=0) {
				Py_ReprLeave((PyObject*)mp);
635
				return -1;
636
			}
637
			fprintf(fp, ": ");
638 639
			if (PyObject_Print(ep->me_value, fp, 0) != 0) {
				Py_ReprLeave((PyObject*)mp);
640
				return -1;
641
			}
642 643 644
		}
	}
	fprintf(fp, "}");
645
	Py_ReprLeave((PyObject*)mp);
646 647 648
	return 0;
}

649
static PyObject *
650
dict_repr(dictobject *mp)
651
{
652 653
	auto PyObject *v;
	PyObject *sepa, *colon;
654 655
	register int i;
	register int any;
656
	register dictentry *ep;
657 658 659 660 661 662 663 664

	i = Py_ReprEnter((PyObject*)mp);
	if (i != 0) {
		if (i > 0)
			return PyString_FromString("{...}");
		return NULL;
	}

665 666 667
	v = PyString_FromString("{");
	sepa = PyString_FromString(", ");
	colon = PyString_FromString(": ");
668
	any = 0;
669
	for (i = 0, ep = mp->ma_table; i < mp->ma_size && v; i++, ep++) {
670 671
		if (ep->me_value != NULL) {
			if (any++)
672 673 674 675
				PyString_Concat(&v, sepa);
			PyString_ConcatAndDel(&v, PyObject_Repr(ep->me_key));
			PyString_Concat(&v, colon);
			PyString_ConcatAndDel(&v, PyObject_Repr(ep->me_value));
676 677
		}
	}
678
	PyString_ConcatAndDel(&v, PyString_FromString("}"));
679
	Py_ReprLeave((PyObject*)mp);
680 681
	Py_XDECREF(sepa);
	Py_XDECREF(colon);
682 683 684 685
	return v;
}

static int
686
dict_length(dictobject *mp)
687 688 689 690
{
	return mp->ma_used;
}

691
static PyObject *
692
dict_subscript(dictobject *mp, register PyObject *key)
693
{
694
	PyObject *v;
695
	long hash;
696
	if (mp->ma_table == NULL) {
697
		PyErr_SetObject(PyExc_KeyError, key);
698 699
		return NULL;
	}
700
#ifdef CACHE_HASH
701 702
	if (!PyString_Check(key) ||
	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
703
#endif
704
	{
705
		hash = PyObject_Hash(key);
706 707 708
		if (hash == -1)
			return NULL;
	}
709
	v = (mp->ma_lookup)(mp, key, hash) -> me_value;
710
	if (v == NULL)
711
		PyErr_SetObject(PyExc_KeyError, key);
712
	else
713
		Py_INCREF(v);
714 715 716 717
	return v;
}

static int
718
dict_ass_sub(dictobject *mp, PyObject *v, PyObject *w)
719 720
{
	if (w == NULL)
721
		return PyDict_DelItem((PyObject *)mp, v);
722
	else
723
		return PyDict_SetItem((PyObject *)mp, v, w);
724 725
}

726 727 728 729
static PyMappingMethods dict_as_mapping = {
	(inquiry)dict_length, /*mp_length*/
	(binaryfunc)dict_subscript, /*mp_subscript*/
	(objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
730 731
};

732
static PyObject *
733
dict_keys(register dictobject *mp, PyObject *args)
734
{
735
	register PyObject *v;
736
	register int i, j;
737
	if (!PyArg_NoArgs(args))
738
		return NULL;
739
	v = PyList_New(mp->ma_used);
740 741 742 743
	if (v == NULL)
		return NULL;
	for (i = 0, j = 0; i < mp->ma_size; i++) {
		if (mp->ma_table[i].me_value != NULL) {
744 745 746
			PyObject *key = mp->ma_table[i].me_key;
			Py_INCREF(key);
			PyList_SetItem(v, j, key);
747 748 749 750 751 752
			j++;
		}
	}
	return v;
}

753
static PyObject *
754
dict_values(register dictobject *mp, PyObject *args)
755
{
756
	register PyObject *v;
757
	register int i, j;
758
	if (!PyArg_NoArgs(args))
759
		return NULL;
760
	v = PyList_New(mp->ma_used);
761 762 763 764
	if (v == NULL)
		return NULL;
	for (i = 0, j = 0; i < mp->ma_size; i++) {
		if (mp->ma_table[i].me_value != NULL) {
765 766 767
			PyObject *value = mp->ma_table[i].me_value;
			Py_INCREF(value);
			PyList_SetItem(v, j, value);
768 769 770 771 772 773
			j++;
		}
	}
	return v;
}

774
static PyObject *
775
dict_items(register dictobject *mp, PyObject *args)
776
{
777
	register PyObject *v;
778
	register int i, j;
779
	if (!PyArg_NoArgs(args))
780
		return NULL;
781
	v = PyList_New(mp->ma_used);
782 783 784 785
	if (v == NULL)
		return NULL;
	for (i = 0, j = 0; i < mp->ma_size; i++) {
		if (mp->ma_table[i].me_value != NULL) {
786 787 788
			PyObject *key = mp->ma_table[i].me_key;
			PyObject *value = mp->ma_table[i].me_value;
			PyObject *item = PyTuple_New(2);
789
			if (item == NULL) {
790
				Py_DECREF(v);
791 792
				return NULL;
			}
793 794 795 796 797
			Py_INCREF(key);
			PyTuple_SetItem(item, 0, key);
			Py_INCREF(value);
			PyTuple_SetItem(item, 1, value);
			PyList_SetItem(v, j, item);
798 799 800 801 802 803
			j++;
		}
	}
	return v;
}

804
static PyObject *
805
dict_update(register dictobject *mp, PyObject *args)
806 807 808 809 810 811
{
	register int i;
	dictobject *other;
        dictentry *entry;
	if (!PyArg_Parse(args, "O!", &PyDict_Type, &other))
		return NULL;
812 813
	if (other == mp || other->ma_used == 0)
		goto done; /* a.update(a) or a.update({}); nothing to do */
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
	/* Do one big resize at the start, rather than incrementally
	   resizing as we insert new items.  Expect that there will be
	   no (or few) overlapping keys. */
	if ((mp->ma_fill + other->ma_used)*3 >= mp->ma_size*2) {
		if (dictresize(mp, (mp->ma_used + other->ma_used)*3/2) != 0)
			return NULL;
	}
	for (i = 0; i < other->ma_size; i++) {
		entry = &other->ma_table[i];
		if (entry->me_value != NULL) {
			Py_INCREF(entry->me_key);
			Py_INCREF(entry->me_value);
			insertdict(mp, entry->me_key, entry->me_hash,
				   entry->me_value);
		}
	}
  done:
	Py_INCREF(Py_None);
	return Py_None;
}

static PyObject *
836
dict_copy(register dictobject *mp, PyObject *args)
837
{
838 839 840 841 842 843
	if (!PyArg_Parse(args, ""))
		return NULL;
	return PyDict_Copy((PyObject*)mp);
}

PyObject *
844
PyDict_Copy(PyObject *o)
845 846
{
	register dictobject *mp;
847 848 849
	register int i;
	dictobject *copy;
        dictentry *entry;
850 851 852

	if (o == NULL || !PyDict_Check(o)) {
		PyErr_BadInternalCall();
853
		return NULL;
854 855
	}
	mp = (dictobject *)o;
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
	copy = (dictobject *)PyDict_New();
	if (copy == NULL)
		return NULL;
	if (mp->ma_used > 0) {
		if (dictresize(copy, mp->ma_used*3/2) != 0)
			return NULL;
		for (i = 0; i < mp->ma_size; i++) {
			entry = &mp->ma_table[i];
			if (entry->me_value != NULL) {
				Py_INCREF(entry->me_key);
				Py_INCREF(entry->me_value);
				insertdict(copy, entry->me_key, entry->me_hash,
					   entry->me_value);
			}
		}
	}
	return (PyObject *)copy;
}

875
int
876
PyDict_Size(PyObject *mp)
877
{
878 879
	if (mp == NULL || !PyDict_Check(mp)) {
		PyErr_BadInternalCall();
880
		return 0;
881
	}
882
	return ((dictobject *)mp)->ma_used;
883 884
}

885
PyObject *
886
PyDict_Keys(PyObject *mp)
887
{
888 889
	if (mp == NULL || !PyDict_Check(mp)) {
		PyErr_BadInternalCall();
890 891
		return NULL;
	}
892
	return dict_keys((dictobject *)mp, (PyObject *)NULL);
893 894
}

895
PyObject *
896
PyDict_Values(PyObject *mp)
897
{
898 899
	if (mp == NULL || !PyDict_Check(mp)) {
		PyErr_BadInternalCall();
900 901
		return NULL;
	}
902
	return dict_values((dictobject *)mp, (PyObject *)NULL);
903 904
}

905
PyObject *
906
PyDict_Items(PyObject *mp)
907
{
908 909
	if (mp == NULL || !PyDict_Check(mp)) {
		PyErr_BadInternalCall();
910 911
		return NULL;
	}
912
	return dict_items((dictobject *)mp, (PyObject *)NULL);
913 914
}

915 916 917 918
/* Subroutine which returns the smallest key in a for which b's value
   is different or absent.  The value is returned too, through the
   pval argument.  No reference counts are incremented. */

919
static PyObject *
920
characterize(dictobject *a, dictobject *b, PyObject **pval)
921
{
922
	PyObject *diff = NULL;
Guido van Rossum's avatar
Guido van Rossum committed
923
	int i, cmp;
924 925 926 927

	*pval = NULL;
	for (i = 0; i < a->ma_size; i++) {
		if (a->ma_table[i].me_value != NULL) {
928 929
			PyObject *key = a->ma_table[i].me_key;
			PyObject *aval, *bval;
Guido van Rossum's avatar
Guido van Rossum committed
930 931 932 933 934
			if (diff != NULL) {
			    cmp = PyObject_RichCompareBool(diff, key, Py_LT);
			    if (cmp < 0)
				return NULL;
			    if (cmp > 0)
935
				continue;
Guido van Rossum's avatar
Guido van Rossum committed
936
			}
937
			aval = a->ma_table[i].me_value;
938
			bval = PyDict_GetItem((PyObject *)b, key);
Guido van Rossum's avatar
Guido van Rossum committed
939 940 941 942 943 944 945 946
			if (bval == NULL)
				cmp = 0;
			else {
			    cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
			    if (cmp < 0)
				return NULL;
			}
			if (cmp == 0)
947
			{
948 949 950 951 952 953 954 955 956
				diff = key;
				*pval = aval;
			}
		}
	}
	return diff;
}

static int
957
dict_compare(dictobject *a, dictobject *b)
958
{
959
	PyObject *adiff, *bdiff, *aval, *bval;
960 961 962 963 964 965 966 967 968
	int res;

	/* Compare lengths first */
	if (a->ma_used < b->ma_used)
		return -1;	/* a is shorter */
	else if (a->ma_used > b->ma_used)
		return 1;	/* b is shorter */
	/* Same length -- check all keys */
	adiff = characterize(a, b, &aval);
Guido van Rossum's avatar
Guido van Rossum committed
969
	if (adiff == NULL && PyErr_Occurred())
970
		return -1;
971 972 973
	if (adiff == NULL)
		return 0;	/* a is a subset with the same length */
	bdiff = characterize(b, a, &bval);
Guido van Rossum's avatar
Guido van Rossum committed
974
	if (bdiff == NULL && PyErr_Occurred())
975
		return -1;
976
	/* bdiff == NULL would be impossible now */
977
	res = PyObject_Compare(adiff, bdiff);
978
	if (res == 0)
979
		res = PyObject_Compare(aval, bval);
980 981 982
	return res;
}

983
static PyObject *
984
dict_has_key(register dictobject *mp, PyObject *args)
985
{
986
	PyObject *key;
987 988
	long hash;
	register long ok;
989
	if (!PyArg_ParseTuple(args, "O:has_key", &key))
990
		return NULL;
991
#ifdef CACHE_HASH
992 993
	if (!PyString_Check(key) ||
	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
994
#endif
995
	{
996
		hash = PyObject_Hash(key);
997 998 999
		if (hash == -1)
			return NULL;
	}
1000 1001
	ok = (mp->ma_size != 0
	      && (mp->ma_lookup)(mp, key, hash)->me_value != NULL);
1002
	return PyInt_FromLong(ok);
1003 1004
}

1005
static PyObject *
1006
dict_get(register dictobject *mp, PyObject *args)
1007 1008
{
	PyObject *key;
1009
	PyObject *failobj = Py_None;
1010 1011 1012
	PyObject *val = NULL;
	long hash;

1013
	if (!PyArg_ParseTuple(args, "O|O:get", &key, &failobj))
1014
		return NULL;
1015 1016
	if (mp->ma_table == NULL)
		goto finally;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026

#ifdef CACHE_HASH
	if (!PyString_Check(key) ||
	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
	{
		hash = PyObject_Hash(key);
		if (hash == -1)
			return NULL;
	}
1027
	val = (mp->ma_lookup)(mp, key, hash)->me_value;
1028

1029
  finally:
1030 1031 1032 1033 1034 1035 1036
	if (val == NULL)
		val = failobj;
	Py_INCREF(val);
	return val;
}


1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
static PyObject *
dict_setdefault(register dictobject *mp, PyObject *args)
{
	PyObject *key;
	PyObject *failobj = Py_None;
	PyObject *val = NULL;
	long hash;

	if (!PyArg_ParseTuple(args, "O|O:setdefault", &key, &failobj))
		return NULL;
	if (mp->ma_table == NULL)
		goto finally;

#ifdef CACHE_HASH
	if (!PyString_Check(key) ||
	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
	{
		hash = PyObject_Hash(key);
		if (hash == -1)
			return NULL;
	}
1059
	val = (mp->ma_lookup)(mp, key, hash)->me_value;
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071

  finally:
	if (val == NULL) {
		val = failobj;
		if (PyDict_SetItem((PyObject*)mp, key, failobj))
			val = NULL;
	}
	Py_XINCREF(val);
	return val;
}


1072
static PyObject *
1073
dict_clear(register dictobject *mp, PyObject *args)
1074
{
1075
	if (!PyArg_NoArgs(args))
1076
		return NULL;
1077 1078 1079
	PyDict_Clear((PyObject *)mp);
	Py_INCREF(Py_None);
	return Py_None;
1080 1081
}

1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
static PyObject *
dict_popitem(dictobject *mp, PyObject *args)
{
	int i = 0;
	dictentry *ep;
	PyObject *res;

	if (!PyArg_NoArgs(args))
		return NULL;
	if (mp->ma_used == 0) {
		PyErr_SetString(PyExc_KeyError,
				"popitem(): dictionary is empty");
		return NULL;
	}
	/* Set ep to "the first" dict entry with a value.  We abuse the hash
	 * field of slot 0 to hold a search finger:
	 * If slot 0 has a value, use slot 0.
	 * Else slot 0 is being used to hold a search finger,
	 * and we use its hash value as the first index to look.
	 */
	ep = &mp->ma_table[0];
	if (ep->me_value == NULL) {
		i = (int)ep->me_hash;
		/* The hash field may be uninitialized trash, or it
		 * may be a real hash value, or it may be a legit
		 * search finger, or it may be a once-legit search
		 * finger that's out of bounds now because it
		 * wrapped around or the table shrunk -- simply
		 * make sure it's in bounds now.
		 */
		if (i >= mp->ma_size || i < 1)
			i = 1;	/* skip slot 0 */
		while ((ep = &mp->ma_table[i])->me_value == NULL) {
			i++;
			if (i >= mp->ma_size)
				i = 1;
		}
	}
	res = PyTuple_New(2);
	if (res != NULL) {
		PyTuple_SET_ITEM(res, 0, ep->me_key);
		PyTuple_SET_ITEM(res, 1, ep->me_value);
		Py_INCREF(dummy);
		ep->me_key = dummy;
		ep->me_value = NULL;
		mp->ma_used--;
		assert(mp->ma_table[0].me_value == NULL);
		mp->ma_table[0].me_hash = i + 1;  /* next place to start */
	}
	return res;
}

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
static int
dict_traverse(PyObject *op, visitproc visit, void *arg)
{
	int i = 0, err;
	PyObject *pk;
	PyObject *pv;

	while (PyDict_Next(op, &i, &pk, &pv)) {
		err = visit(pk, arg);
		if (err)
			return err;
		err = visit(pv, arg);
		if (err)
			return err;
	}
	return 0;
}

static int
dict_tp_clear(PyObject *op)
{
	PyDict_Clear(op);
	return 0;
}

1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190

static char has_key__doc__[] =
"D.has_key(k) -> 1 if D has a key k, else 0";

static char get__doc__[] =
"D.get(k[,d]) -> D[k] if D.has_key(k), else d.  d defaults to None.";

static char setdefault_doc__[] =
"D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if not D.has_key(k)";

static char popitem__doc__[] =
"D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
2-tuple; but raise KeyError if D is empty";

static char keys__doc__[] =
"D.keys() -> list of D's keys";

static char items__doc__[] =
"D.items() -> list of D's (key, value) pairs, as 2-tuples";

static char values__doc__[] =
"D.values() -> list of D's values";

static char update__doc__[] =
"D.update(E) -> None.  Update D from E: for k in E.keys(): D[k] = E[k]";

static char clear__doc__[] =
"D.clear() -> None.  Remove all items from D.";

static char copy__doc__[] =
"D.copy() -> a shallow copy of D";

1191
static PyMethodDef mapp_methods[] = {
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
	{"has_key",	(PyCFunction)dict_has_key,      METH_VARARGS,
	 has_key__doc__},
	{"get",         (PyCFunction)dict_get,          METH_VARARGS,
	 get__doc__},
	{"setdefault",  (PyCFunction)dict_setdefault,   METH_VARARGS,
	 setdefault_doc__},
	{"popitem",	(PyCFunction)dict_popitem,	METH_OLDARGS,
	 popitem__doc__},
	{"keys",	(PyCFunction)dict_keys,		METH_OLDARGS,
	keys__doc__},
	{"items",	(PyCFunction)dict_items,	METH_OLDARGS,
	 items__doc__},
	{"values",	(PyCFunction)dict_values,	METH_OLDARGS,
	 values__doc__},
	{"update",	(PyCFunction)dict_update,	METH_OLDARGS,
	 update__doc__},
	{"clear",	(PyCFunction)dict_clear,	METH_OLDARGS,
	 clear__doc__},
	{"copy",	(PyCFunction)dict_copy,		METH_OLDARGS,
	 copy__doc__},
	{NULL,		NULL}	/* sentinel */
1213 1214
};

1215
static PyObject *
1216
dict_getattr(dictobject *mp, char *name)
1217
{
1218
	return Py_FindMethod(mapp_methods, (PyObject *)mp, name);
1219 1220
}

1221 1222
PyTypeObject PyDict_Type = {
	PyObject_HEAD_INIT(&PyType_Type)
1223
	0,
1224
	"dictionary",
1225
	sizeof(dictobject) + PyGC_HEAD_SIZE,
1226
	0,
Guido van Rossum's avatar
Guido van Rossum committed
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
	(destructor)dict_dealloc,		/* tp_dealloc */
	(printfunc)dict_print,			/* tp_print */
	(getattrfunc)dict_getattr,		/* tp_getattr */
	0,					/* tp_setattr */
	(cmpfunc)dict_compare,			/* tp_compare */
	(reprfunc)dict_repr,			/* tp_repr */
	0,					/* tp_as_number */
	0,					/* tp_as_sequence */
	&dict_as_mapping,			/* tp_as_mapping */
	0,					/* tp_hash */
	0,					/* tp_call */
	0,					/* tp_str */
	0,					/* tp_getattro */
	0,					/* tp_setattro */
	0,					/* tp_as_buffer */
	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC,	/* tp_flags */
	0,					/* tp_doc */
	(traverseproc)dict_traverse,		/* tp_traverse */
	(inquiry)dict_tp_clear,			/* tp_clear */
	0,					/* tp_richcompare */
1247 1248
};

1249 1250
/* For backward compatibility with old dictionary interface */

1251
PyObject *
1252
PyDict_GetItemString(PyObject *v, char *key)
1253
{
1254 1255 1256 1257 1258 1259 1260
	PyObject *kv, *rv;
	kv = PyString_FromString(key);
	if (kv == NULL)
		return NULL;
	rv = PyDict_GetItem(v, kv);
	Py_DECREF(kv);
	return rv;
1261 1262 1263
}

int
1264
PyDict_SetItemString(PyObject *v, char *key, PyObject *item)
1265
{
1266 1267 1268 1269
	PyObject *kv;
	int err;
	kv = PyString_FromString(key);
	if (kv == NULL)
1270
		return -1;
1271
	PyString_InternInPlace(&kv); /* XXX Should we really? */
1272 1273 1274
	err = PyDict_SetItem(v, kv, item);
	Py_DECREF(kv);
	return err;
1275 1276 1277
}

int
1278
PyDict_DelItemString(PyObject *v, char *key)
1279
{
1280 1281 1282 1283
	PyObject *kv;
	int err;
	kv = PyString_FromString(key);
	if (kv == NULL)
1284
		return -1;
1285 1286 1287
	err = PyDict_DelItem(v, kv);
	Py_DECREF(kv);
	return err;
1288
}