import heapq, bisect
from operator import itemgetter, neg
from itertools import islice, repeat, count, imap, izip, tee
def nlargest(n, iterable, key=None, ties=False):
'''Find the n largest elements in iterable.
@param ties: If False, equivalent to heapq.nlargest(); ties are not taken
into account. If True, the returned list is guaranteed to contain all
the equal smallest elements of the top-`n`. If it is not possible to
satisfy this constraint by returning a list of size `n` exactly, then
the top-`k` elements that satisfy the constraint are returned, where
`k` is the largest integer smaller than `n`.
>>> s = [-4,3,5,7,4,-7,-4,-3]
>>> for i in xrange(1,len(s)+1):
... print i, nlargest(i,s,key=abs)
1 [7]
2 [7, -7]
3 [7, -7, 5]
4 [7, -7, 5, -4]
5 [7, -7, 5, -4, 4]
6 [7, -7, 5, -4, 4, -4]
7 [7, -7, 5, -4, 4, -4, 3]
8 [7, -7, 5, -4, 4, -4, 3, -3]
>>> for i in xrange(1,len(s)+1):
... print i, nlargest(i,s,key=abs,ties=True)
1 []
2 [7, -7]
3 [7, -7, 5]
4 [7, -7, 5]
5 [7, -7, 5]
6 [7, -7, 5, -4, 4, -4]
7 [7, -7, 5, -4, 4, -4]
8 [7, -7, 5, -4, 4, -4, 3, -3]
'''
if not ties:
return heapq.nlargest(n, iterable, key)
in1, in2 = tee(iterable)
it = izip(imap(key, in1), imap(neg, count()), in2) # decorate
result = list(islice(it, n))
if not result:
return result
heapq.heapify(result)
heappush, heappop, heapreplace = heapq.heappush, heapq.heappop, heapq.heapreplace
# smallest_key: smallest key of the nlargest
smallest_key = result[0][0]
# overflow: True if there are currently ties that don't fit in result
overflow = False
for elem in it:
elem_key = elem[0]
if elem_key < smallest_key:
continue
if not overflow:
assert len(result) == n and result[0][0] == smallest_key
if elem_key > smallest_key:
elem_key = heapreplace(result, elem)[0]
smallest_key = result[0][0]
assert elem_key <= smallest_key
# if the pending element (new or replaced) is equal to the smallest
# we've got a tie that can't fit in result: drop ties
if elem_key == smallest_key:
overflow = True
while result and result[0][0] == elem_key:
heappop(result)
else:
assert len(result) < n
if elem_key > smallest_key:
heappush(result, elem)
if len(result) == n:
# result just filled and the last element is larger
# than smallest: existing ties are invalidated
overflow = False
smallest_key = result[0][0]
result.sort(reverse=True)
return map(itemgetter(2), result) # undecorate
def nsmallest(n, iterable, key=None, ties=False):
'''Find the n smallest elements in iterable.
@param ties: If False, equivalent to heapq.nsmallest(); ties are not taken
into account. If True, the returned list is guaranteed to contain all
the equal largest elements of the top-`n`. If it is not possible to
satisfy this constraint by returning a list of size `n` exactly, then
the top-`k` elements that satisfy the constraint are returned, where
`k` is the largest integer smaller than `n`.
>>> s = [-4,3,5,7,4,-7,-4,-3]
>>> for i in xrange(1,len(s)+1):
... print i, nsmallest(i,s,key=abs)
1 [3]
2 [3, -3]
3 [3, -3, -4]
4 [3, -3, -4, 4]
5 [3, -3, -4, 4, -4]
6 [3, -3, -4, 4, -4, 5]
7 [3, -3, -4, 4, -4, 5, 7]
8 [3, -3, -4, 4, -4, 5, 7, -7]
>>> for i in xrange(1,len(s)+1):
... print i, nsmallest(i,s,key=abs,ties=True)
1 []
2 [3, -3]
3 [3, -3]
4 [3, -3]
5 [3, -3, -4, 4, -4]
6 [3, -3, -4, 4, -4, 5]
7 [3, -3, -4, 4, -4, 5]
8 [3, -3, -4, 4, -4, 5, 7, -7]
'''
if not ties:
return heapq.nsmallest(n, iterable, key)
in1, in2 = tee(iterable)
it = izip(imap(key, in1), count(), in2) # decorate
if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
# For smaller values of n, the bisect method is faster than a minheap.
# It is also memory efficient, consuming only n elements of space.
result = sorted(islice(it, n))
if not result:
return []
insort = bisect.insort
pop = result.pop
# largest_key: largest key of the nsmallest
largest_key = result[-1][0]
# overflow: True if there are currently ties that don't fit in result
overflow = False
for elem in it:
elem_key = elem[0]
if elem_key > largest_key:
continue
if not overflow:
assert len(result) == n and result[-1][0] == largest_key
if elem_key < largest_key:
insort(result, elem)
# pop the largest from the result
elem_key = pop()[0]
# and update largest to the new largest
largest_key = result[-1][0]
assert elem_key >= largest_key
# if the pending element (new or popped) is equal to the largest
# we've got a tie that can't fit in result: drop ties
if elem_key == largest_key:
overflow = True
while result and result[-1][0] == elem_key:
pop()
else:
assert len(result) < n
if elem_key < largest_key:
insort(result, elem)
if len(result) == n:
# result just filled and the last element is smaller
# than largest: existing ties are invalidated
overflow = False
largest_key = result[-1][0]
else:
# An alternative approach manifests the whole iterable in memory but
# saves comparisons by heapifying all at once. Also, saves time
# over bisect.insort() which has O(n) data movement time for every
# insertion. Finding the n smallest of an m length iterable requires
# O(m) + O(n log m) comparisons.
h = list(it)
heapq.heapify(h)
result = map(heapq.heappop, repeat(h, min(n, len(h))))
if result:
largest_key = result[-1][0]
# is largest_key equal to the next smallest in heap ?
if h and h[0][0] == largest_key:
# if yes, delete all trailing ties
while result and result[-1][0] == largest_key:
del result[-1]
return map(itemgetter(2), result) # undecorate
if __name__ == '__main__':
import doctest; doctest.testmod()