Kaydet (Commit) bad1eb2f authored tarafından Raymond Hettinger's avatar Raymond Hettinger

Build-outs for Counter() class:

* Constructor and update() support keyword args (like their dict counterparts).
* The 'del' statement no longer raises KeyError for missing values.
* Add multiset operations:  __add__, __sub__, __and__, __or__.
üst e8b619c1
......@@ -189,6 +189,7 @@ For example::
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
>>> c = Counter(spam=8, eggs=1) # a new counter from keyword args
The returned object has a dictionary style interface except that it returns
a zero count for missing items (instead of raising a :exc:`KeyError` like a
......@@ -219,7 +220,7 @@ For example::
Elements are returned in arbitrary order. If an element's count has been
set to zero or a negative number, :meth:`elements` will ignore it.
>>> c = Counter({'a': 4, 'b': 2, 'd': 0, 'e': -2})
>>> c = Counter(a=4, b=2, c=0, d=-2)
>>> list(c.elements())
['a', 'a', 'a', 'a', 'b', 'b']
......@@ -244,10 +245,10 @@ For example::
.. method:: update([iterable-or-mapping])
Like :meth:`dict.update` but adds-in counts instead of replacing them.
Elements are counted from an *iterable* or added-in from another
*mapping* (or counter)::
*mapping* (or counter). Like :meth:`dict.update` but adds-in counts
instead of replacing them, and the *iterable* is expected to be a
sequence of elements, not a sequence of ``(key, value)`` pairs::
>>> c = Counter('which')
>>> c.update('witch') # add elements from another iterable
......@@ -267,6 +268,34 @@ Common patterns for working with :class:`Counter` objects::
Counter(dict(list_of_pairs)) # convert from a list of (elem, cnt) pairs
c.most_common()[:-n:-1] # n least common elements
Several multiset mathematical operations are provided for combining
:class:`Counter` objects. Multisets are like regular sets but allowed to
contain repeated elements (with counts of one or more). Addition and
subtraction combine counters by adding or subtracting the counts of
corresponding elements. Intersection and union return the minimum and maximum
of corresponding counts::
>>> c = Counter('a': 3, 'b': 1})
>>> d = Counter({'a': 1, 'b': 2})
>>> c + d # add two counters together: c[x] + d[x]
Counter({'a': 4, 'b': 3})
>>> c - d # subtract (keeping only positive counts)
Counter({'a': 2})
>>> c & d # interection: min(c[x], d[x])
Counter({'a': 1, 'b': 1})
>>> c | d # union: max(c[x], d[x])
Counter({'a': 3, 'b': 2})
All four multiset operations produce only positive counts (negative and zero
results are skipped). If inputs include negative counts, addition will sum
both counts and then exclude non-positive results. The other three operations
are undefined for negative inputs::
>>> e = Counter(a=8, b=-2, c=0)
>>> e += Counter() # remove zero and negative counts
>>> e
Counter({'a': 8})
**References**:
* Wikipedia entry for `Multisets <http://en.wikipedia.org/wiki/Multiset>`_
......
......@@ -10,7 +10,7 @@ from operator import itemgetter as _itemgetter
from keyword import iskeyword as _iskeyword
import sys as _sys
import heapq as _heapq
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, ifilter as _ifilter
########################################################################
### namedtuple #######################################################
......@@ -167,7 +167,7 @@ class Counter(dict):
# http://code.activestate.com/recipes/259174/
# Knuth, TAOCP Vol. II section 4.6.3
def __init__(self, iterable=None):
def __init__(self, iterable=None, **kwds):
'''Create a new, empty Counter object. And if given, count elements
from an input iterable. Or, initialize the count from another mapping
of elements to their counts.
......@@ -175,9 +175,10 @@ class Counter(dict):
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
>>> c = Counter(a=4, b=2) # a new counter from keyword args
'''
self.update(iterable)
self.update(iterable, **kwds)
def __missing__(self, key):
'The count of elements not in the Counter is zero.'
......@@ -228,7 +229,7 @@ class Counter(dict):
raise NotImplementedError(
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
def update(self, iterable=None):
def update(self, iterable=None, **kwds):
'''Like dict.update() but add counts instead of replacing them.
Source can be an iterable, a dictionary, or another Counter instance.
......@@ -245,10 +246,8 @@ class Counter(dict):
# replace behavior results in the some of original untouched counts
# being mixed-in with all of the other counts for a mismash that
# doesn't have a straight-forward interpretation in most counting
# contexts. Instead, we look to Knuth for suggested operations on
# multisets and implement the union-add operation discussed in
# TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for
# multisets calls that operation a sum or join.
# contexts. Instead, we implement straight-addition. Both the inputs
# and outputs are allowed to contain zero and negative counts.
if iterable is not None:
if isinstance(iterable, Mapping):
......@@ -257,17 +256,101 @@ class Counter(dict):
else:
for elem in iterable:
self[elem] += 1
if kwds:
self.update(kwds)
def copy(self):
'Like dict.copy() but returns a Counter instance instead of a dict.'
return Counter(self)
def __delitem__(self, elem):
'Like dict.__delitem__() but does not raise KeyError for missing values.'
if elem in self:
dict.__delitem__(self, elem)
def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
return '%s({%s})' % (self.__class__.__name__, items)
# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
# and at http://en.wikipedia.org/wiki/Multiset
#
# Results are undefined when inputs contain negative counts.
# Outputs guaranteed to only include positive counts.
#
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()
def __add__(self, other):
'''Add counts from two counters.
>>> Counter('abbb') + Counter('bcc')
Counter({'b': 4, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem in set(self) | set(other):
newcount = self[elem] + other[elem]
if newcount > 0:
result[elem] = newcount
return result
def __sub__(self, other):
''' Subtract count, but keep only results with positive counts.
>>> Counter('abbbc') - Counter('bccd')
Counter({'b': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.iteritems():
newcount = count - other[elem]
if newcount > 0:
result[elem] = newcount
return result
def __or__(self, other):
'''Union is the maximum of value in either of the input counters.
>>> Counter('abbb') | Counter('bcc')
Counter({'b': 3, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
_max = max
result = Counter()
for elem in set(self) | set(other):
newcount = _max(self[elem], other[elem])
if newcount > 0:
result[elem] = newcount
return result
def __and__(self, other):
''' Intersection is the minimum of corresponding counts.
>>> Counter('abbb') & Counter('bcc')
Counter({'b': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
_min = min
result = Counter()
if len(self) < len(other):
self, other = other, self
for elem in _ifilter(self.__contains__, other):
newcount = _min(self[elem], other[elem])
if newcount > 0:
result[elem] = newcount
return result
if __name__ == '__main__':
......
......@@ -2,6 +2,8 @@ import unittest, doctest
from test import test_support
from collections import namedtuple, Counter, Mapping
import pickle, cPickle, copy
from random import randrange
import operator
from collections import Hashable, Iterable, Iterator
from collections import Sized, Container, Callable
from collections import Set, MutableSet
......@@ -350,6 +352,8 @@ class TestCounter(unittest.TestCase):
def test_basics(self):
c = Counter('abcaba')
self.assertEqual(c, Counter({'a':3 , 'b': 2, 'c': 1}))
self.assertEqual(c, Counter(a=3, b=2, c=1))
self.assert_(isinstance(c, dict))
self.assert_(isinstance(c, Mapping))
self.assert_(issubclass(Counter, dict))
......@@ -379,6 +383,7 @@ class TestCounter(unittest.TestCase):
c['a'] += 1 # increment an existing value
c['b'] -= 2 # sub existing value to zero
del c['c'] # remove an entry
del c['c'] # make sure that del doesn't raise KeyError
c['d'] -= 2 # sub from a missing value
c['e'] = -5 # directly assign a missing value
c['f'] += 4 # add to a missing value
......@@ -394,7 +399,8 @@ class TestCounter(unittest.TestCase):
self.assertEqual(repr(c), 'Counter()')
self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
self.assertRaises(TypeError, hash, c)
c.update(dict(a=5, b=3, c=1))
c.update(dict(a=5, b=3))
c.update(c=1)
c.update(Counter('a' * 50 + 'b' * 30))
c.update() # test case with no args
c.__init__('a' * 500 + 'b' * 300)
......@@ -442,6 +448,43 @@ class TestCounter(unittest.TestCase):
self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
self.assertEqual(set(Counter(s)), set(s))
def test_multiset_operations(self):
# Verify that adding a zero counter will strip zeros and negatives
c = Counter(a=10, b=-2, c=0) + Counter()
self.assertEqual(dict(c), dict(a=10))
elements = 'abcd'
for i in range(1000):
# test random pairs of multisets
p = Counter(dict((elem, randrange(-2,4)) for elem in elements))
q = Counter(dict((elem, randrange(-2,4)) for elem in elements))
for counterop, numberop, defneg in [
(Counter.__add__, lambda x, y: x+y if x+y>0 else 0, True),
(Counter.__sub__, lambda x, y: x-y if x-y>0 else 0, False),
(Counter.__or__, max, False),
(Counter.__and__, min, False),
]:
result = counterop(p, q)
for x in elements:
# all except __add__ are undefined for negative inputs
if defneg or (p[x] >= 0 and q[x] >= 0):
self.assertEqual(numberop(p[x], q[x]), result[x])
# verify that results exclude non-positive counts
self.assert_(x>0 for x in result.values())
elements = 'abcdef'
for i in range(100):
# verify that random multisets with no repeats are exactly like sets
p = Counter(dict((elem, randrange(0, 2)) for elem in elements))
q = Counter(dict((elem, randrange(0, 2)) for elem in elements))
for counterop, setop in [
(Counter.__sub__, set.__sub__),
(Counter.__or__, set.__or__),
(Counter.__and__, set.__and__),
]:
counter_result = counterop(p, q)
set_result = setop(set(p.elements()), set(q.elements()))
self.assertEqual(counter_result, dict.fromkeys(set_result, 1))
import doctest, collections
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment