Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
234fb2d5
Kaydet (Commit)
234fb2d5
authored
May 11, 2014
tarafından
Raymond Hettinger
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue 21424: Apply the nlargest() optimizations to nsmallest() as well.
üst
3a17e217
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
137 additions
and
117 deletions
+137
-117
heapq.py
Lib/heapq.py
+115
-41
test_heapq.py
Lib/test/test_heapq.py
+1
-1
NEWS
Misc/NEWS
+2
-2
_heapqmodule.c
Modules/_heapqmodule.c
+19
-73
No files found.
Lib/heapq.py
Dosyayı görüntüle @
234fb2d5
...
@@ -127,7 +127,7 @@ From all times, sorting has always been a Great Art! :-)
...
@@ -127,7 +127,7 @@ From all times, sorting has always been a Great Art! :-)
__all__
=
[
'heappush'
,
'heappop'
,
'heapify'
,
'heapreplace'
,
'merge'
,
__all__
=
[
'heappush'
,
'heappop'
,
'heapify'
,
'heapreplace'
,
'merge'
,
'nlargest'
,
'nsmallest'
,
'heappushpop'
]
'nlargest'
,
'nsmallest'
,
'heappushpop'
]
from
itertools
import
islice
,
count
,
tee
,
chain
from
itertools
import
islice
,
count
def
heappush
(
heap
,
item
):
def
heappush
(
heap
,
item
):
"""Push item onto heap, maintaining the heap invariant."""
"""Push item onto heap, maintaining the heap invariant."""
...
@@ -179,12 +179,12 @@ def heapify(x):
...
@@ -179,12 +179,12 @@ def heapify(x):
for
i
in
reversed
(
range
(
n
//
2
)):
for
i
in
reversed
(
range
(
n
//
2
)):
_siftup
(
x
,
i
)
_siftup
(
x
,
i
)
def
_heap
pushpop
_max
(
heap
,
item
):
def
_heap
replace
_max
(
heap
,
item
):
"""Maxheap version of a heapp
ush followed by a heappop
."""
"""Maxheap version of a heapp
op followed by a heappush
."""
if
heap
and
item
<
heap
[
0
]:
returnitem
=
heap
[
0
]
# raises appropriate IndexError if heap is empty
item
,
heap
[
0
]
=
heap
[
0
],
item
heap
[
0
]
=
item
_siftup_max
(
heap
,
0
)
_siftup_max
(
heap
,
0
)
return
item
return
return
item
def
_heapify_max
(
x
):
def
_heapify_max
(
x
):
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
...
@@ -192,24 +192,6 @@ def _heapify_max(x):
...
@@ -192,24 +192,6 @@ def _heapify_max(x):
for
i
in
reversed
(
range
(
n
//
2
)):
for
i
in
reversed
(
range
(
n
//
2
)):
_siftup_max
(
x
,
i
)
_siftup_max
(
x
,
i
)
def
nsmallest
(
n
,
iterable
):
"""Find the n smallest elements in a dataset.
Equivalent to: sorted(iterable)[:n]
"""
if
n
<=
0
:
return
[]
it
=
iter
(
iterable
)
result
=
list
(
islice
(
it
,
n
))
if
not
result
:
return
result
_heapify_max
(
result
)
_heappushpop
=
_heappushpop_max
for
elem
in
it
:
_heappushpop
(
result
,
elem
)
result
.
sort
()
return
result
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
# is the index of a leaf with a possibly out-of-order value. Restore the
# is the index of a leaf with a possibly out-of-order value. Restore the
# heap invariant.
# heap invariant.
...
@@ -327,6 +309,10 @@ try:
...
@@ -327,6 +309,10 @@ try:
from
_heapq
import
*
from
_heapq
import
*
except
ImportError
:
except
ImportError
:
pass
pass
try
:
from
_heapq
import
_heapreplace_max
except
ImportError
:
pass
def
merge
(
*
iterables
):
def
merge
(
*
iterables
):
'''Merge multiple sorted inputs into a single sorted output.
'''Merge multiple sorted inputs into a single sorted output.
...
@@ -367,22 +353,86 @@ def merge(*iterables):
...
@@ -367,22 +353,86 @@ def merge(*iterables):
yield
v
yield
v
yield
from
next
.
__self__
yield
from
next
.
__self__
# Extend the implementations of nsmallest and nlargest to use a key= argument
_nsmallest
=
nsmallest
# Algorithm notes for nlargest() and nsmallest()
# ==============================================
#
# Makes just a single pass over the data while keeping the k most extreme values
# in a heap. Memory consumption is limited to keeping k values in a list.
#
# Measured performance for random inputs:
#
# number of comparisons
# n inputs k-extreme values (average of 5 trials) % more than min()
# ------------- ---------------- - ------------------- -----------------
# 1,000 100 3,317 133.2%
# 10,000 100 14,046 40.5%
# 100,000 100 105,749 5.7%
# 1,000,000 100 1,007,751 0.8%
# 10,000,000 100 10,009,401 0.1%
#
# Theoretical number of comparisons for k smallest of n random inputs:
#
# Step Comparisons Action
# ---- -------------------------- ---------------------------
# 1 1.66 * k heapify the first k-inputs
# 2 n - k compare remaining elements to top of heap
# 3 k * (1 + lg2(k)) * ln(n/k) replace the topmost value on the heap
# 4 k * lg2(k) - (k/2) final sort of the k most extreme values
# Combining and simplifying for a rough estimate gives:
# comparisons = n + k * (1 + log(n/k)) * (1 + log(k, 2))
#
# Computing the number of comparisons for step 3:
# -----------------------------------------------
# * For the i-th new value from the iterable, the probability of being in the
# k most extreme values is k/i. For example, the probability of the 101st
# value seen being in the 100 most extreme values is 100/101.
# * If the value is a new extreme value, the cost of inserting it into the
# heap is 1 + log(k, 2).
# * The probabilty times the cost gives:
# (k/i) * (1 + log(k, 2))
# * Summing across the remaining n-k elements gives:
# sum((k/i) * (1 + log(k, 2)) for xrange(k+1, n+1))
# * This reduces to:
# (H(n) - H(k)) * k * (1 + log(k, 2))
# * Where H(n) is the n-th harmonic number estimated by:
# gamma = 0.5772156649
# H(n) = log(n, e) + gamma + 1.0 / (2.0 * n)
# http://en.wikipedia.org/wiki/Harmonic_series_(mathematics)#Rate_of_divergence
# * Substituting the H(n) formula:
# comparisons = k * (1 + log(k, 2)) * (log(n/k, e) + (1/n - 1/k) / 2)
#
# Worst-case for step 3:
# ----------------------
# In the worst case, the input data is reversed sorted so that every new element
# must be inserted in the heap:
#
# comparisons = 1.66 * k + log(k, 2) * (n - k)
#
# Alternative Algorithms
# ----------------------
# Other algorithms were not used because they:
# 1) Took much more auxiliary memory,
# 2) Made multiple passes over the data.
# 3) Made more comparisons in common cases (small k, large n, semi-random input).
# See the more detailed comparison of approach at:
# http://code.activestate.com/recipes/577573-compare-algorithms-for-heapqsmallest
def
nsmallest
(
n
,
iterable
,
key
=
None
):
def
nsmallest
(
n
,
iterable
,
key
=
None
):
"""Find the n smallest elements in a dataset.
"""Find the n smallest elements in a dataset.
Equivalent to: sorted(iterable, key=key)[:n]
Equivalent to: sorted(iterable, key=key)[:n]
"""
"""
# Short-cut for n==1 is to use min() when len(iterable)>0
# Short-cut for n==1 is to use min() when len(iterable)>0
if
n
==
1
:
if
n
==
1
:
it
=
iter
(
iterable
)
it
=
iter
(
iterable
)
head
=
list
(
islice
(
it
,
1
))
sentinel
=
object
()
if
not
head
:
return
[]
if
key
is
None
:
if
key
is
None
:
return
[
min
(
chain
(
head
,
it
))]
result
=
min
(
it
,
default
=
sentinel
)
return
[
min
(
chain
(
head
,
it
),
key
=
key
)]
else
:
result
=
min
(
it
,
default
=
sentinel
,
key
=
key
)
return
[]
if
result
is
sentinel
else
[
result
]
# When n>=size, it's faster to use sorted()
# When n>=size, it's faster to use sorted()
try
:
try
:
...
@@ -395,15 +445,39 @@ def nsmallest(n, iterable, key=None):
...
@@ -395,15 +445,39 @@ def nsmallest(n, iterable, key=None):
# When key is none, use simpler decoration
# When key is none, use simpler decoration
if
key
is
None
:
if
key
is
None
:
it
=
zip
(
iterable
,
count
())
# decorate
it
=
iter
(
iterable
)
result
=
_nsmallest
(
n
,
it
)
result
=
list
(
islice
(
zip
(
it
,
count
()),
n
))
return
[
r
[
0
]
for
r
in
result
]
# undecorate
if
not
result
:
return
result
_heapify_max
(
result
)
order
=
n
top
=
result
[
0
][
0
]
_heapreplace
=
_heapreplace_max
for
elem
in
it
:
if
elem
<
top
:
_heapreplace
(
result
,
(
elem
,
order
))
top
=
result
[
0
][
0
]
order
+=
1
result
.
sort
()
return
[
r
[
0
]
for
r
in
result
]
# General case, slowest method
# General case, slowest method
in1
,
in2
=
tee
(
iterable
)
it
=
iter
(
iterable
)
it
=
zip
(
map
(
key
,
in1
),
count
(),
in2
)
# decorate
result
=
[(
key
(
elem
),
i
,
elem
)
for
i
,
elem
in
zip
(
range
(
n
),
it
)]
result
=
_nsmallest
(
n
,
it
)
if
not
result
:
return
[
r
[
2
]
for
r
in
result
]
# undecorate
return
result
_heapify_max
(
result
)
order
=
n
top
=
result
[
0
][
0
]
_heapreplace
=
_heapreplace_max
for
elem
in
it
:
k
=
key
(
elem
)
if
k
<
top
:
_heapreplace
(
result
,
(
k
,
order
,
elem
))
top
=
result
[
0
][
0
]
order
+=
1
result
.
sort
()
return
[
r
[
2
]
for
r
in
result
]
def
nlargest
(
n
,
iterable
,
key
=
None
):
def
nlargest
(
n
,
iterable
,
key
=
None
):
"""Find the n largest elements in a dataset.
"""Find the n largest elements in a dataset.
...
@@ -442,9 +516,9 @@ def nlargest(n, iterable, key=None):
...
@@ -442,9 +516,9 @@ def nlargest(n, iterable, key=None):
_heapreplace
=
heapreplace
_heapreplace
=
heapreplace
for
elem
in
it
:
for
elem
in
it
:
if
top
<
elem
:
if
top
<
elem
:
order
-=
1
_heapreplace
(
result
,
(
elem
,
order
))
_heapreplace
(
result
,
(
elem
,
order
))
top
=
result
[
0
][
0
]
top
=
result
[
0
][
0
]
order
-=
1
result
.
sort
(
reverse
=
True
)
result
.
sort
(
reverse
=
True
)
return
[
r
[
0
]
for
r
in
result
]
return
[
r
[
0
]
for
r
in
result
]
...
@@ -460,9 +534,9 @@ def nlargest(n, iterable, key=None):
...
@@ -460,9 +534,9 @@ def nlargest(n, iterable, key=None):
for
elem
in
it
:
for
elem
in
it
:
k
=
key
(
elem
)
k
=
key
(
elem
)
if
top
<
k
:
if
top
<
k
:
order
-=
1
_heapreplace
(
result
,
(
k
,
order
,
elem
))
_heapreplace
(
result
,
(
k
,
order
,
elem
))
top
=
result
[
0
][
0
]
top
=
result
[
0
][
0
]
order
-=
1
result
.
sort
(
reverse
=
True
)
result
.
sort
(
reverse
=
True
)
return
[
r
[
2
]
for
r
in
result
]
return
[
r
[
2
]
for
r
in
result
]
...
...
Lib/test/test_heapq.py
Dosyayı görüntüle @
234fb2d5
...
@@ -13,7 +13,7 @@ c_heapq = support.import_fresh_module('heapq', fresh=['_heapq'])
...
@@ -13,7 +13,7 @@ c_heapq = support.import_fresh_module('heapq', fresh=['_heapq'])
# _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when
# _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when
# _heapq is imported, so check them there
# _heapq is imported, so check them there
func_names
=
[
'heapify'
,
'heappop'
,
'heappush'
,
'heappushpop'
,
func_names
=
[
'heapify'
,
'heappop'
,
'heappush'
,
'heappushpop'
,
'heapreplace'
,
'_
nsmallest
'
]
'heapreplace'
,
'_
heapreplace_max
'
]
class
TestModules
(
TestCase
):
class
TestModules
(
TestCase
):
def
test_py_functions
(
self
):
def
test_py_functions
(
self
):
...
...
Misc/NEWS
Dosyayı görüntüle @
234fb2d5
...
@@ -84,8 +84,8 @@ Library
...
@@ -84,8 +84,8 @@ Library
- Issue #21156: importlib.abc.InspectLoader.source_to_code() is now a
- Issue #21156: importlib.abc.InspectLoader.source_to_code() is now a
staticmethod.
staticmethod.
- Issue #21424: Simplified and optimized heaqp.nlargest()
to make fewer
- Issue #21424: Simplified and optimized heaqp.nlargest()
and nmsmallest()
tuple comparisons.
t
o make fewer t
uple comparisons.
- Issue #21396: Fix TextIOWrapper(..., write_through=True) to not force a
- Issue #21396: Fix TextIOWrapper(..., write_through=True) to not force a
flush() on the underlying binary stream. Patch by akira.
flush() on the underlying binary stream. Patch by akira.
...
...
Modules/_heapqmodule.c
Dosyayı görüntüle @
234fb2d5
...
@@ -354,88 +354,34 @@ _siftupmax(PyListObject *heap, Py_ssize_t pos)
...
@@ -354,88 +354,34 @@ _siftupmax(PyListObject *heap, Py_ssize_t pos)
}
}
static
PyObject
*
static
PyObject
*
nsmallest
(
PyObject
*
self
,
PyObject
*
args
)
_heapreplace_max
(
PyObject
*
self
,
PyObject
*
args
)
{
{
PyObject
*
heap
=
NULL
,
*
elem
,
*
iterable
,
*
los
,
*
it
,
*
oldelem
;
PyObject
*
heap
,
*
item
,
*
returnitem
;
Py_ssize_t
i
,
n
;
int
cmp
;
if
(
!
PyArg_
ParseTuple
(
args
,
"nO:nsmallest"
,
&
n
,
&
iterable
))
if
(
!
PyArg_
UnpackTuple
(
args
,
"_heapreplace_max"
,
2
,
2
,
&
heap
,
&
item
))
return
NULL
;
return
NULL
;
i
t
=
PyObject_GetIter
(
iterable
);
i
f
(
!
PyList_Check
(
heap
))
{
if
(
it
==
NULL
)
PyErr_SetString
(
PyExc_TypeError
,
"heap argument must be a list"
);
return
NULL
;
return
NULL
;
heap
=
PyList_New
(
0
);
if
(
heap
==
NULL
)
goto
fail
;
for
(
i
=
0
;
i
<
n
;
i
++
){
elem
=
PyIter_Next
(
it
);
if
(
elem
==
NULL
)
{
if
(
PyErr_Occurred
())
goto
fail
;
else
goto
sortit
;
}
if
(
PyList_Append
(
heap
,
elem
)
==
-
1
)
{
Py_DECREF
(
elem
);
goto
fail
;
}
Py_DECREF
(
elem
);
}
}
n
=
PyList_GET_SIZE
(
heap
);
if
(
n
==
0
)
goto
sortit
;
for
(
i
=
n
/
2
-
1
;
i
>=
0
;
i
--
)
if
(
_siftupmax
((
PyListObject
*
)
heap
,
i
)
==
-
1
)
goto
fail
;
los
=
PyList_GET_ITEM
(
heap
,
0
);
while
(
1
)
{
elem
=
PyIter_Next
(
it
);
if
(
elem
==
NULL
)
{
if
(
PyErr_Occurred
())
goto
fail
;
else
goto
sortit
;
}
cmp
=
PyObject_RichCompareBool
(
elem
,
los
,
Py_LT
);
if
(
cmp
==
-
1
)
{
Py_DECREF
(
elem
);
goto
fail
;
}
if
(
cmp
==
0
)
{
Py_DECREF
(
elem
);
continue
;
}
oldelem
=
PyList_GET_ITEM
(
heap
,
0
);
if
(
PyList_GET_SIZE
(
heap
)
<
1
)
{
PyList_SET_ITEM
(
heap
,
0
,
elem
);
PyErr_SetString
(
PyExc_IndexError
,
"index out of range"
);
Py_DECREF
(
oldelem
);
return
NULL
;
if
(
_siftupmax
((
PyListObject
*
)
heap
,
0
)
==
-
1
)
goto
fail
;
los
=
PyList_GET_ITEM
(
heap
,
0
);
}
}
sortit:
returnitem
=
PyList_GET_ITEM
(
heap
,
0
);
if
(
PyList_Sort
(
heap
)
==
-
1
)
Py_INCREF
(
item
);
goto
fail
;
PyList_SET_ITEM
(
heap
,
0
,
item
);
Py_DECREF
(
it
);
if
(
_siftupmax
((
PyListObject
*
)
heap
,
0
)
==
-
1
)
{
return
heap
;
Py_DECREF
(
returnitem
);
return
NULL
;
fail:
}
Py_DECREF
(
it
);
return
returnitem
;
Py_XDECREF
(
heap
);
return
NULL
;
}
}
PyDoc_STRVAR
(
nsmallest_doc
,
PyDoc_STRVAR
(
heapreplace_max_doc
,
"Maxheap variant of heapreplace"
);
"Find the n smallest elements in a dataset.
\n
\
\n
\
Equivalent to: sorted(iterable)[:n]
\n
"
);
static
PyMethodDef
heapq_methods
[]
=
{
static
PyMethodDef
heapq_methods
[]
=
{
{
"heappush"
,
(
PyCFunction
)
heappush
,
{
"heappush"
,
(
PyCFunction
)
heappush
,
...
@@ -448,8 +394,8 @@ static PyMethodDef heapq_methods[] = {
...
@@ -448,8 +394,8 @@ static PyMethodDef heapq_methods[] = {
METH_VARARGS
,
heapreplace_doc
},
METH_VARARGS
,
heapreplace_doc
},
{
"heapify"
,
(
PyCFunction
)
heapify
,
{
"heapify"
,
(
PyCFunction
)
heapify
,
METH_O
,
heapify_doc
},
METH_O
,
heapify_doc
},
{
"
nsmallest"
,
(
PyCFunction
)
nsmallest
,
{
"
_heapreplace_max"
,(
PyCFunction
)
_heapreplace_max
,
METH_VARARGS
,
nsmallest
_doc
},
METH_VARARGS
,
heapreplace_max
_doc
},
{
NULL
,
NULL
}
/* sentinel */
{
NULL
,
NULL
}
/* sentinel */
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment