Kaydet (Commit) 33ecffb6 authored tarafından Raymond Hettinger's avatar Raymond Hettinger

SF patch #969791: Add nlargest() and nsmallest() to heapq.

üst 7d019664
...@@ -83,6 +83,30 @@ True ...@@ -83,6 +83,30 @@ True
>>> >>>
\end{verbatim} \end{verbatim}
The module also offers two general purpose functions based on heaps.
\begin{funcdesc}{nlargest}{iterable, n}
Return a list with the \var{n} largest elements from the dataset defined
by \var{iterable}. Equivalent to: \code{sorted(iterable, reverse=True)[:n]}
\versionadded{2.4}
\end{funcdesc}
\begin{funcdesc}{nsmallest}{iterable, n}
Return a list with the \var{n} smallest elements from the dataset defined
by \var{iterable}. Equivalent to: \code{sorted(iterable)[:n]}
\versionadded{2.4}
\end{funcdesc}
Though the above functions appear symmetrical, they each have different
speed and space requirements. In particular, \function{nsmallest()}
operates on a full copy of the dataset. In contrast, \function{nlargest()}
only requires storage space for \var{n} elements.
Both functions perform best for smaller values of \var{n}. For larger
values, it is more efficient to use the \function{sorted()} function. Also,
when \code{n==1}, it is more efficient to use the builtin \function{min()}
and \function{max()} functions.
\subsection{Theory} \subsection{Theory}
......
...@@ -449,7 +449,10 @@ improved performance: \module{Queue}, \module{mutex}, \module{shlex} ...@@ -449,7 +449,10 @@ improved performance: \module{Queue}, \module{mutex}, \module{shlex}
\item The \module{heapq} module has been converted to C. The resulting \item The \module{heapq} module has been converted to C. The resulting
tenfold improvement in speed makes the module suitable for handling tenfold improvement in speed makes the module suitable for handling
high volumes of data. high volumes of data. In addition, the module has two new functions
\function{nlargest()} and \function{nsmallest()} that use heaps to
find the largest or smallest n values in a dataset without the
expense of a full sort.
\item The \module{imaplib} module now supports IMAP's THREAD command. \item The \module{imaplib} module now supports IMAP's THREAD command.
(Contributed by Yves Dionne.) (Contributed by Yves Dionne.)
......
...@@ -30,7 +30,7 @@ without surprises: heap[0] is the smallest item, and heap.sort() ...@@ -30,7 +30,7 @@ without surprises: heap[0] is the smallest item, and heap.sort()
maintains the heap invariant! maintains the heap invariant!
""" """
# Original code by Kevin O'Connor, augmented by Tim Peters # Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
__about__ = """Heap queues __about__ = """Heap queues
...@@ -126,7 +126,10 @@ Believe me, real good tape sorts were quite spectacular to watch! ...@@ -126,7 +126,10 @@ Believe me, real good tape sorts were quite spectacular to watch!
From all times, sorting has always been a Great Art! :-) From all times, sorting has always been a Great Art! :-)
""" """
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace'] __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
'nsmallest']
from itertools import islice, repeat
def heappush(heap, item): def heappush(heap, item):
"""Push item onto heap, maintaining the heap invariant.""" """Push item onto heap, maintaining the heap invariant."""
...@@ -168,6 +171,35 @@ def heapify(x): ...@@ -168,6 +171,35 @@ def heapify(x):
for i in reversed(xrange(n//2)): for i in reversed(xrange(n//2)):
_siftup(x, i) _siftup(x, i)
def nlargest(iterable, n):
"""Find the n largest elements in a dataset.
Equivalent to: sorted(iterable, reverse=True)[:n]
"""
it = iter(iterable)
result = list(islice(it, n))
if not result:
return result
heapify(result)
_heapreplace = heapreplace
sol = result[0] # sol --> smallest of the nlargest
for elem in it:
if elem <= sol:
continue
_heapreplace(result, elem)
sol = result[0]
result.sort(reverse=True)
return result
def nsmallest(iterable, n):
"""Find the n smallest elements in a dataset.
Equivalent to: sorted(iterable)[:n]
"""
h = list(iterable)
heapify(h)
return map(heappop, repeat(h, min(n, len(h))))
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
# is the index of a leaf with a possibly out-of-order value. Restore the # is the index of a leaf with a possibly out-of-order value. Restore the
# heap invariant. # heap invariant.
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
from test.test_support import verify, vereq, verbose, TestFailed from test.test_support import verify, vereq, verbose, TestFailed
from heapq import heappush, heappop, heapify, heapreplace from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
import random import random
def check_invariant(heap): def check_invariant(heap):
...@@ -84,6 +84,15 @@ def test_main(): ...@@ -84,6 +84,15 @@ def test_main():
data.sort() data.sort()
sorted = [heappop(heap) for i in range(size)] sorted = [heappop(heap) for i in range(size)]
vereq(data, sorted) vereq(data, sorted)
# 7) Check nlargest() and nsmallest()
data = [random.randrange(2000) for i in range(1000)]
copy = data[:]
copy.sort(reverse=True)
vereq(nlargest(data, 400), copy[:400])
copy.sort()
vereq(nsmallest(data, 400), copy[:400])
# Make user happy # Make user happy
if verbose: if verbose:
print "All OK" print "All OK"
......
...@@ -416,7 +416,9 @@ Library ...@@ -416,7 +416,9 @@ Library
os.path.exists(), switched to using os.lstat() directly if possible. os.path.exists(), switched to using os.lstat() directly if possible.
- bisect.py and heapq.py now have underlying C implementations - bisect.py and heapq.py now have underlying C implementations
for better performance for better performance.
- heapq.py has two new functions, nsmallest() and nlargest().
- traceback.format_exc has been added (similar to print_exc but it returns - traceback.format_exc has been added (similar to print_exc but it returns
a string). a string).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment