How can I make a unique value priority queue in Python? - python

Python has Queue.PriorityQueue, but I cannot see a way to make each value in it unique as there is no method for checking if a value already exists (like find(name) or similar). Moreover, PriorityQueue needs the priority to remain within the value, so I could not even search for my value, as I would also have to know the priority. You would use (0.5, myvalue) as value in PriorityQueue and then it would be sorted by the first element of the tuple.
The collections.deque class on the other hand does offer a function for checking if a value already exists and is even more natural in usage (without locking, but still atomic), but it does not offer a way to sort by priority.
There are some other implementations on stackoverflow with heapq, but heapq also uses priority within the value (e.g. at the first position of a tuple), so it seems not be great for comparison of already existing values.
Creating a python priority Queue
https://stackoverflow.com/questions/3306179/priority-queue-problem-in-python
What is the best way of creating a atomic priority queue (=can be used from multiple threads) with unique values?
Example what I’d like to add:
Priority: 0.2, Value: value1
Priority: 0.3, Value: value2
Priority: 0.1, Value: value3 (shall be retrieved first automatically)
Priority: 0.4, Value: value1 (shall not be added again, even though it has different priority)

You could combine a priority queue with a set:
import heapq
class PrioritySet(object):
def __init__(self):
self.heap = []
self.set = set()
def add(self, d, pri):
if not d in self.set:
heapq.heappush(self.heap, (pri, d))
self.set.add(d)
def pop(self):
pri, d = heapq.heappop(self.heap)
self.set.remove(d)
return d
This uses the priority queue specified in one of your linked questions. I don't know if this is what you want, but it's rather easy to add a set to any kind of queue this way.

Well here's one way to do it. I basically started from how they defined PriorityQueue in Queue.py and added a set into it to keep track of unique keys:
from Queue import PriorityQueue
import heapq
class UniquePriorityQueue(PriorityQueue):
def _init(self, maxsize):
# print 'init'
PriorityQueue._init(self, maxsize)
self.values = set()
def _put(self, item, heappush=heapq.heappush):
# print 'put',item
if item[1] not in self.values:
print 'uniq',item[1]
self.values.add(item[1])
PriorityQueue._put(self, item, heappush)
else:
print 'dupe',item[1]
def _get(self, heappop=heapq.heappop):
# print 'get'
item = PriorityQueue._get(self, heappop)
# print 'got',item
self.values.remove(item[1])
return item
if __name__=='__main__':
u = UniquePriorityQueue()
u.put((0.2, 'foo'))
u.put((0.3, 'bar'))
u.put((0.1, 'baz'))
u.put((0.4, 'foo'))
while not u.empty():
item = u.get_nowait()
print item
Boaz Yaniv beat me to the punch by a few minutes, but I figured I'd post mine too as it supports the full interface of PriorityQueue. I left some print statements uncommented, but commented out the ones I put in while debugging it. ;)

In case you want to prioritise a task later.
u = UniquePriorityQueue()
u.put((0.2, 'foo'))
u.put((0.3, 'bar'))
u.put((0.1, 'baz'))
u.put((0.4, 'foo'))
# Now `foo`'s priority is increased.
u.put((0.05, 'foo'))
Here is another implementation follows the official guide:
import heapq
import Queue
class UniquePriorityQueue(Queue.Queue):
"""
- https://github.com/python/cpython/blob/2.7/Lib/Queue.py
- https://docs.python.org/3/library/heapq.html
"""
def _init(self, maxsize):
self.queue = []
self.REMOVED = object()
self.entry_finder = {}
def _put(self, item, heappush=heapq.heappush):
item = list(item)
priority, task = item
if task in self.entry_finder:
previous_item = self.entry_finder[task]
previous_priority, _ = previous_item
if priority < previous_priority:
# Remove previous item.
previous_item[-1] = self.REMOVED
self.entry_finder[task] = item
heappush(self.queue, item)
else:
# Do not add new item.
pass
else:
self.entry_finder[task] = item
heappush(self.queue, item)
def _qsize(self, len=len):
return len(self.entry_finder)
def _get(self, heappop=heapq.heappop):
"""
The base makes sure this shouldn't be called if `_qsize` is 0.
"""
while self.queue:
item = heappop(self.queue)
_, task = item
if task is not self.REMOVED:
del self.entry_finder[task]
return item
raise KeyError('It should never happen: pop from an empty priority queue')

I like #Jonny Gaines Jr.'s answer but I think it can be simplified. PriorityQueue uses a list undert he hood, so you can just define:
class PrioritySetQueue(PriorityQueue):
def _put(self, item):
if item not in self.queue:
super(PrioritySetQueue, self)._put(item)

Related

Implement LRU cache with using #functools.lru_decorator in python

So I've been trying to implement an LRU cache for my project,
Using the python functools lru_cache.
As a reference I used this.
The following is the code is used from the reference.
def timed_lru_cache(maxsize, seconds):
def wrapper_cache(func):
func = lru_cache(maxsize=maxsize)(func)
func.lifetime = timedelta(seconds=seconds)
func.expiration = datetime.utcnow() + func.lifetime
#wraps(func)
def wrapped_func(*args, **kwargs):
if datetime.utcnow() >= func.expiration:
func.cache_clear()
func.expiration = datetime.utcnow() + func.lifetime
return func(*args, **kwargs)
return wrapped_func
return wrapper_cache
#timed_lru_cache(maxsize=config.cache_size, seconds=config.ttl)
def load_into_cache(id):
return object
In the wrapped func part, the func.cache_clear(), clears the entire cache along with all the items.
I need help to remove only elements past its expiretime after inserting.
Is there any work around?
I don't think it's so easy to adapt the existing lru_cache, and I don't think that linked method is very clear.
Instead I implemented a timed lru cache from scratch. See the docstring at the top for usage.
It stores a key based on the args and kwargs of the inputs, and manages two structures:
A mapping of key => (expiry, result)
A list of recently used, where the first item is the least recently used
Every time you try to get an item, the key is looked up in the "recently used" list. If it isn't there, it gets added to the list and the mapping. If it is there, we check if the expiry is in the past. If it is, we recalculate the result, and update. Otherwise we can just return whatever is in the mapping.
from datetime import datetime, timedelta
from functools import wraps
from typing import Any, Dict, List, Optional, Tuple
class TimedLRUCache:
""" Cache that caches results based on an expiry time, and on least recently used.
Items are eliminated first if they expire, and then if too many "recent" items are being
stored.
There are two methods of using this cache, either the `get` method`, or calling this as a
decorator. The `get` method accepts any arbitrary function, but on the parameters are
considered in the key, so it is advisable not to mix function.
>>> cache = TimedLRUCache(5)
>>> def foo(i):
... return i + 1
>>> cache.get(foo, 1) # runs foo
>>> cache.get(foo, 1) # returns the previously calculated result
As a decorator is more familiar:
>>> #TimedLRUCache(5)
... def foo(i):
... return i + 1
>>> foo(1) # runs foo
>>> foo(1) # returns the previously calculated result
Either method can allow for fine-grained control of the cache:
>>> five_second_cache = TimedLRUCache(5)
>>> #five_second_cache
... def foo(i):
... return i + 1
>>> five_second_cache.clear_cache() # resets the cache (clear every item)
>>> five_second_cache.prune() # clear invalid items
"""
_items: Dict[int, Tuple[datetime, Any]]
_recently_added: List[int]
delta: timedelta
max_size: int
def __init__(self, seconds: Optional[int] = None, max_size: Optional[int] = None):
self.delta = timedelta(seconds=seconds) if seconds else None
self.max_size = max_size
self._items = {}
self._recently_added = []
def __call__(self, func):
#wraps(func)
def wrapper(*args, **kwargs):
return self.get(func, args, kwargs)
return wrapper
#staticmethod
def _get_key(args, kwargs) -> int:
""" Get the thing we're going to use to lookup items in the cache. """
key = (args, tuple(sorted(kwargs.items())))
return hash(key)
def _update(self, key: int, item: Any) -> None:
""" Make sure an item is up to date. """
if key in self._recently_added:
self._recently_added.remove(key)
# the first item in the list is the least recently used
self._recently_added.append(key)
self._items[key] = (datetime.now() + self.delta, item)
# when this function is called, something has changed, so we can also sort out the cache
self.prune()
def prune(self):
""" Clear out everything that no longer belongs in the cache
First delete everything that has expired. Then delete everything that isn't recent (only
if there is a `max_size`).
"""
# clear out anything that no longer belongs in the cache.
current_time = datetime.now()
# first get rid of things which have expired
for key, (expiry, item) in self._items.items():
if expiry < current_time:
del self._items[key]
self._recently_added.remove(key)
# then make sure there aren't too many recent items
if self.max_size:
self._recently_added[:-self.max_size] = []
def clear_cache(self):
""" Clear everything from the cache """
self._items = {}
self._recently_added = []
def get(self, func, args, kwargs):
""" Given a function and its arguments, get the result using the cache
Get the key from the arguments of the function. If the key is in the cache, and the
expiry time of that key hasn't passed, return the result from the cache.
If the key *has* expired, or there are too many "recent" items, recalculate the result,
add it to the cache, and then return the result.
"""
key = self._get_key(args, kwargs)
current_time = datetime.now()
if key in self._recently_added:
# there is something in the cache
expiry, item = self._items.get(key)
if expiry < current_time:
# the item has expired, so we need to get the new value
new_item = func(*args, **kwargs)
self._update(key, new_item)
return new_item
else:
# we can use the existing value
return item
else:
# never seen this before, so add it
new_item = func(*args, **kwargs)
self._update(key, new_item)
return new_item

How to pass two arguments with one variable into a function?

I have been given a class which implements a Priority Queue , using a function to evaluate the priority.
class PriorityQueueWithFunction(PriorityQueue):
"""
Implements a priority queue with the same push/pop signature of the
Queue and the Stack classes. This is designed for drop-in replacement for
those two classes. The caller has to provide a priority function, which
extracts each item's priority.
"""
def __init__(self, priorityFunction):
# type: (object) -> object
"priorityFunction (item) -> priority"
self.priorityFunction = priorityFunction # store the priority function
PriorityQueue.__init__(self) # super-class initializer
def push(self, item):
"Adds an item to the queue with priority from the priority function"
PriorityQueue.push(self, item, self.priorityFunction(item))
I have been also , given , the priority function that I am going to initialize the class above with.
def manhattanHeuristic(position, problem, info={}):
"The Manhattan distance heuristic for a PositionSearchProblem"
xy1 = position
xy2 = problem.goal
return abs(xy1[0] - xy2[0]) + abs(xy1[1] - xy2[1])
The above code is given to us and we cannot change it. I must create that PriorityQueueWithFunction Class and push an element to it. The push function of my class takes on argument , the item. But my PriorityFunction takes 2.
What kind of arguments should i use to push the right elemnt into my class and also make my priorityfunction work properly ?
That's what i tried and i am getting compiling errors , manhattanHeuristic...takes 2 arguments , 1 given
#Creating a queingFn
queuingFn = PriorityQueueWithFunction(heuristic)
Frontier = queuingFn
#Creating the item that needs to be pushed
StartState = problem.getStartState()
StartNode = (StartState,'',0,(-1,-1))
#Here is my problem
item = StartState , problem
Frontier.push(item)
Should I change my item's form ? Any ideas ?
You should make a new method that wraps call to manhattanHeuristic:
# for item as dict: item = {'position': POS, 'problem': PROBLEM}
def oneArgHeuristic(item):
position = item.position
problem = item.problem
return manhattanHeuristic(position, problem)
# for item as tuple: item = (POS, PROBLEM)
def oneArgHeuristic(item):
position, problem = item
return manhattanHeuristic(position, problem)
and pass it to PriorityQueueWithFunction instead of the original one

Priority queue with higher priority first in Python

I need a priority queue that gets the item with the highest priority value first. I'm currently using the PriorityQueue Class from the Queue library. However, this function only returns the items with the lowest value first. I tried some ugly solutions like (sys.maxint - priority) as the priority, but was just wondering if a more elegant solution exists.
Use a negative priority instead, no need to subtract from sys.maxint.
queue.put((-priority, item))
An item with priority -10 will be returned before items with priority -5, for example.
You can extend the Priority Queue to keep the logic unchanged:
from Queue import PriorityQueue
class DualPriorityQueue(PriorityQueue):
def __init__(self, maxPQ=False):
PriorityQueue.__init__(self)
self.reverse = -1 if maxPQ else 1
def put(self, priority, data):
PriorityQueue.put(self, (self.reverse * priority, data))
def get(self, *args, **kwargs):
priority, data = PriorityQueue.get(self, *args, **kwargs)
return self.reverse * priority, data
minQ = DualPriorityQueue()
maxQ = DualPriorityQueue(maxPQ=True)
minQ.put(10, 'A')
minQ.put(100, 'A')
maxQ.put(10, 'A')
maxQ.put(100,'A')
print "Min DQ: {}".format(minQ.get())
print "Max DQ: {}".format(maxQ.get())
Output:
Min DQ: (10, 'A')
Max DQ: (100, 'A')

Dictionary+Queue Data Structure with Active Removal of Old Messages

I would like to create a data structure which represents a set of queues (ideally a hash, map, or dict like lookup) where messages in the queues are being actively removed after they've reached a certain age. The ttl value would be global; messages would not need nor have individual ttl's. The resolution for the ttl doesn't need to be terribly accurate - only within a second or so.
I'm not even sure what to search for here. I could create a separate global queue that a background thread is monitoring, peeking and pulling pointers to messages off the global queue that tell it to remove items from the individual queues, but the behavior needs to go both ways. If an item gets removed from an invidual queue, it needs to remove from the global queue.
I would like for this data structure to be implemented in Python, ideally, and as always, speed is of the utmost importance (more so than memory usage). Any suggestions for where to start?
I'd start by just modeling the behavior you're looking for in a single class, expressed as simply as possible. Performance can come later on through iterative optimization, but only if necessary (you may not need it).
The class below does something roughly like what you're describing. Queues are simply lists that are named and stored in dictionary. Each message is timestamped and inserted at the front of the list (FIFO). Messages are reaped by checking the timestamp of the message at the end of the list, and popping it until it hits a message that is below the age threshold.
If you plan to access this from several threads you'll need to add some fine-grained locking to squeeze the most performance out of it. For example, the reap() method should only lock 1 queue at a time, rather than locking all queues (method-level synchronization), so you'd also need to keep a lock for each named queue.
Updated -- Now uses a global set of buckets (by timestamp, 1 second resolution) to keep track of which queues have messages from that time. This reduces the number of queues to be checked on each pass.
import time
from collections import defaultdict
class QueueMap(object):
def __init__(self):
self._expire = defaultdict(lambda *n: defaultdict(int))
self._store = defaultdict(list)
self._oldest_key = int(time.time())
def get_queue(self, name):
return self._store.get(name, [])
def pop(self, name):
queue = self.get_queue(name)
if queue:
key, msg = queue.pop()
self._expire[key][name] -= 1
return msg
return None
def set(self, name, message):
key = int(time.time())
# increment count of messages in this bucket/queue
self._expire[key][name] += 1
self._store[name].insert(0, (key, message))
def reap(self, age):
now = time.time()
threshold = int(now - age)
oldest = self._oldest_key
# iterate over buckets we need to check
for key in range(oldest, threshold + 1):
# for each queue with items, expire the oldest ones
for name, count in self._expire[key].iteritems():
if count <= 0:
continue
queue = self.get_queue(name)
while queue:
if queue[-1][0] > threshold:
break
queue.pop()
del self._expire[key]
# set oldest_key for next pass
self._oldest_key = threshold
Usage:
qm = QueueMap()
qm.set('one', 'message 1')
qm.set('one', 'message 2')
qm.set('two', 'message 3')
print qm.pop('one')
print qm.get_queue('one')
print qm.get_queue('two')
# call this on a background thread which sleeps
time.sleep(2)
# reap messages older than 1 second
qm.reap(1)
# queues should be empty now
print qm.get_queue('one')
print qm.get_queue('two')
Consider checking the TTLs whenever you access the queues instead of using a thread to be constantly checking. I'm not sure what you mean about the hash/map/dict (what is the key?), but how about something like this:
import time
class EmptyException(Exception): pass
class TTLQueue(object):
TTL = 60 # seconds
def __init__(self):
self._queue = []
def push(self, msg):
self._queue.append((time.time()+self.TTL, msg))
def pop(self):
self._queue = [(t, msg) for (t, msg) in self._queue if t > time.time()]
if len(self._queue) == 0:
raise EmptyException()
return self._queue.pop(0)[1]
queues = [TTLQueue(), TTLQueue(), TTLQueue()] # this could be a dict or set or
# whatever if I knew what keys
# you expected

A generic priority queue for Python

I need to use a priority queue in my Python code, and:
am looking for any fast implementations for priority queues
optimally, I'd like the queue to be generic (i.e. work well for any object with a specified comparison operator).
Looking around for something efficient, I came upon heapq, but:
I'm looking for something faster than heapq, which is implemented in native Python, so it's not fast.
It looks good, but seems to be specified only for integers. I suppose it works with any objects that have comparison operators, but it doesn't specify what comparison operators it needs.
Update: Re comparison in heapq, I can either use a (priority, object) as Charlie Martin suggests, or just implement __cmp__ for my object.
You can use Queue.PriorityQueue.
Recall that Python isn't strongly typed, so you can save anything you like: just make a tuple of (priority, thing) and you're set.
When using a priority queue, decrease-key is a must-have operation for many algorithms (Dijkstra's Algorithm, A*, OPTICS), I wonder why Python's built-in priority queue does not support it. None of the other answers supply a solution that supports this functionality.
A priority queue which also supports decrease-key operation is this implementation by Daniel Stutzbach worked perfectly for me with Python 3.5.
from heapdict import heapdict
hd = heapdict()
hd["two"] = 2
hd["one"] = 1
obj = hd.popitem()
print("object:",obj[0])
print("priority:",obj[1])
# object: one
# priority: 1
I ended up implementing a wrapper for heapq, adding a dict for maintaining the queue's elements unique. The result should be quite efficient for all operators:
class PriorityQueueSet(object):
"""
Combined priority queue and set data structure.
Acts like a priority queue, except that its items are guaranteed to be
unique. Provides O(1) membership test, O(log N) insertion and O(log N)
removal of the smallest item.
Important: the items of this data structure must be both comparable and
hashable (i.e. must implement __cmp__ and __hash__). This is true of
Python's built-in objects, but you should implement those methods if you
want to use the data structure for custom objects.
"""
def __init__(self, items=[]):
"""
Create a new PriorityQueueSet.
Arguments:
items (list): An initial item list - it can be unsorted and
non-unique. The data structure will be created in O(N).
"""
self.set = dict((item, True) for item in items)
self.heap = self.set.keys()
heapq.heapify(self.heap)
def has_item(self, item):
"""Check if ``item`` exists in the queue."""
return item in self.set
def pop_smallest(self):
"""Remove and return the smallest item from the queue."""
smallest = heapq.heappop(self.heap)
del self.set[smallest]
return smallest
def add(self, item):
"""Add ``item`` to the queue if doesn't already exist."""
if item not in self.set:
self.set[item] = True
heapq.heappush(self.heap, item)
You can use heapq for non-integer elements (tuples):
import heapq
heap = []
data = [(10,"ten"), (3,"three"), (5,"five"), (7,"seven"), (9, "nine"), (2,"two")]
for item in data:
heapq.heappush(heap, item)
sorted_data = []
while heap:
sorted_data.append(heapq.heappop(heap))
print(sorted_data)
data.sort()
print(data == sorted_data)
This will be significantly faster than the queue.PriorityQueue option recommended in the top answer, and unlike queue.PriorityQueue, heapq won't hang forever if you try to pop from an empty heap.
I've not used it, but you could try PyHeap. It's written in C so hopefully it is fast enough for you.
Are you positive heapq/PriorityQueue won't be fast enough? It might be worth going with one of them to start, and then profiling to see if it really is your performance bottlneck.
Did you look at the "Show Source" link on the heapq page? There's an example a little less than halfway down of using a heap with a list of (int, char) tuples as a priority queue.
I am implementing a priority queue in python 3 using queue.PriorityQueue like this-
from queue import PriorityQueue
class PqElement(object):
def __init__(self, value: int):
self.val = value
#Custom Compare Function (less than or equsal)
def __lt__(self, other):
"""self < obj."""
return self.val > other.val
#Print each element function
def __repr__(self):
return f'PQE:{self.val}'
#Usage-
pq = PriorityQueue()
pq.put(PqElement(v)) #Add Item - O(Log(n))
topValue = pq.get() #Pop top item - O(1)
topValue = pq.queue[0].val #Get top value - O(1)
This is efficient and works for strings or any type input as well -:)
import itertools
from heapq import heappush, heappop
pq = [] # list of entries arranged in a heap
entry_finder = {} # mapping of tasks to entries
REMOVED = '<removed-task>' # placeholder for a removed task
counter = itertools.count() # unique sequence count
def add_task(task, priority=0):
'Add a new task or update the priority of an existing task'
if task in entry_finder:
remove_task(task)
count = next(counter)
entry = [priority, count, task]
entry_finder[task] = entry
heappush(pq, entry)
def remove_task(task):
'Mark an existing task as REMOVED. Raise KeyError if not found.'
entry = entry_finder.pop(task)
entry[-1] = REMOVED
def pop_task():
'Remove and return the lowest priority task. Raise KeyError if empty.'
while pq:
priority, count, task = heappop(pq)
if task is not REMOVED:
del entry_finder[task]
return task
raise KeyError('pop from an empty priority queue')
Reference:
http://docs.python.org/library/heapq.html
I've got a priority queue / fibonacci heap at https://pypi.python.org/pypi/fibonacci-heap-mod
It's not fast (large constant c on delete-min, which is O(c*logn)). But find-min, insert, decrease-key and merge are all O(1) - IOW, it's lazy.
If it's too slow on CPython, you might try Pypy, Nuitka or even CPython+Numba :)
A simple implement:
since PriorityQueue is lower first.
from queue import PriorityQueue
class PriorityQueueWithKey(PriorityQueue):
def __init__(self, key=None, maxsize=0):
super().__init__(maxsize)
self.key = key
def put(self, item):
if self.key is None:
super().put((item, item))
else:
super().put((self.key(item), item))
def get(self):
return super().get(self.queue)[1]
a = PriorityQueueWithKey(abs)
a.put(-4)
a.put(-3)
print(*a.queue)
I can either use a (priority, object) as Charlie Martin suggests, or just implement __cmp__ for my object.
If you want inserted objects to be prioritized by a specific rule, I found it very helpful to write a simple subclass of PriorityQueue which accepts a key-function. You won't have to insert (priority, object) tuples manually and the handling feels more natural.
Demo of the desired behavior:
>>> h = KeyHeap(sum)
>>> h.put([-1,1])
>>> h.put((-1,-2,-3))
>>> h.put({100})
>>> h.put([1,2,3])
>>> h.get()
(-1, -2, -3)
>>> h.get()
[-1, 1]
>>> h.get()
[1, 2, 3]
>>> h.get()
set([100])
>>> h.empty()
True
>>>
>>> k = KeyHeap(len)
>>> k.put('hello')
>>> k.put('stackoverflow')
>>> k.put('!')
>>> k.get()
'!'
>>> k.get()
'hello'
>>> k.get()
'stackoverflow'
Python 2 code
from Queue import PriorityQueue
class KeyHeap(PriorityQueue):
def __init__(self, key, maxsize=0):
PriorityQueue.__init__(self, maxsize)
self.key = key
def put(self, x):
PriorityQueue.put(self, (self.key(x), x))
def get(self):
return PriorityQueue.get(self)[1]
Python 3 code
from queue import PriorityQueue
class KeyHeap(PriorityQueue):
def __init__(self, key, maxsize=0):
super().__init__(maxsize)
self.key = key
def put(self, x):
super().put((self.key(x), x))
def get(self):
return super().get()[1]
Obviously, calling put will (and should!) raise an error if you try to insert an object which your key-function cannot process.
If you want to keep an entire list ordered, not just the top value, I've used some variation of this code in multiple projects, it's a drop in replacement for the standard list class with a similar api:
import bisect
class OrderedList(list):
"""Keep a list sorted as you append or extend it
An ordered list, this sorts items from smallest to largest using key, so
if you want MaxQueue like functionality use negative values: .pop(-1) and
if you want MinQueue like functionality use positive values: .pop(0)
"""
def __init__(self, iterable=None, key=None):
if key:
self.key = key
self._keys = []
super(OrderedList, self).__init__()
if iterable:
for x in iterable:
self.append(x)
def key(self, x):
return x
def append(self, x):
k = self.key(x)
# https://docs.python.org/3/library/bisect.html#bisect.bisect_right
i = bisect.bisect_right(self._keys, k)
if i is None:
super(OrderedList, self).append((self.key(x), x))
self._keys.append(k)
else:
super(OrderedList, self).insert(i, (self.key(x), x))
self._keys.insert(i, k)
def extend(self, iterable):
for x in iterable:
self.append(x)
def remove(self, x):
k = self.key(x)
self._keys.remove(k)
super(OrderedList, self).remove((k, x))
def pop(self, i=-1):
self._keys.pop(i)
return super(OrderedList, self).pop(i)[-1]
def clear(self):
super(OrderedList, self).clear()
self._keys.clear()
def __iter__(self):
for x in super(OrderedList, self).__iter__():
yield x[-1]
def __getitem__(self, i):
return super(OrderedList, self).__getitem__(i)[-1]
def insert(self, i, x):
raise NotImplementedError()
def __setitem__(self, x):
raise NotImplementedError()
def reverse(self):
raise NotImplementedError()
def sort(self):
raise NotImplementedError()
It can handle tuples like (priority, value) by default but you can also customize it like this:
class Val(object):
def __init__(self, priority, val):
self.priority = priority
self.val = val
h = OrderedList(key=lambda x: x.priority)
h.append(Val(100, "foo"))
h.append(Val(10, "bar"))
h.append(Val(200, "che"))
print(h[0].val) # "bar"
print(h[-1].val) # "che"
If you only have a single "higher priority" level rather than arbitrarily many as supported by queue.PriorityQueue, you can efficiently use a collections.deque for this by inserting normal jobs at the left .appendleft(), and inserting your higher-priority entries at the right .append()
Both queue and deque instances have threadsafe push/pop methods
Misc advantages to Deques
allows peeking arbitrary elements (indexable and iterable without popping, while queue instances can only be popped)
significantly faster than queue.PriorityQueue (see sketchy testing below)
Cautions about length limitations
setting a length will let it push elements out of either end, not just off the left, unlike queue instances, which block or raise queue.Full
any unbounded collection will eventually run your system out of memory if input rate exceeds consumption
import threading
from collections import deque as Deque
Q = Deque() # don't set a maximum length
def worker_queue_creator(q):
sleepE = threading.Event() # use wait method for sleeping thread
sleepE.wait(timeout=1)
for index in range(3): # start with a few jobs
Q.appendleft("low job {}".format(index))
Q.append("high job 1") # add an important job
for index in range(3, 3+3): # add a few more jobs
Q.appendleft("low job {}".format(index))
# one more important job before ending worker
sleepE.wait(timeout=2)
Q.append("high job 2")
# wait while the consumer worker processes these before exiting
sleepE.wait(timeout=5)
def worker_queue_consumer(q):
""" daemon thread which consumes queue forever """
sleepE = threading.Event() # use wait method for sleeping thread
sleepE.wait(timeout=1) # wait a moment to mock startup
while True:
try:
pre_q_str = str(q) # see what the Deque looks like before before pop
job = q.pop()
except IndexError: # Deque is empty
pass # keep trying forever
else: # successfully popped job
print("{}: {}".format(job, pre_q_str))
sleepE.wait(timeout=0.4) # quickly consume jobs
# create threads to consume and display the queue
T = [
threading.Thread(target=worker_queue_creator, args=(Q,)),
threading.Thread(target=worker_queue_consumer, args=(Q,), daemon=True),
]
for t in T:
t.start()
T[0].join() # wait on sleep in worker_queue_creator to quit
% python3 deque_as_priorityqueue.py
high job 1: deque(['low job 5', 'low job 4', 'low job 3', 'low job 2', 'low job 1', 'low job 0', 'high job 1'])
low job 0: deque(['low job 5', 'low job 4', 'low job 3', 'low job 2', 'low job 1', 'low job 0'])
low job 1: deque(['low job 5', 'low job 4', 'low job 3', 'low job 2', 'low job 1'])
low job 2: deque(['low job 5', 'low job 4', 'low job 3', 'low job 2'])
low job 3: deque(['low job 5', 'low job 4', 'low job 3'])
high job 2: deque(['low job 5', 'low job 4', 'high job 2'])
low job 4: deque(['low job 5', 'low job 4'])
low job 5: deque(['low job 5'])
Comparison
import timeit
NUMBER = 1000
values_builder = """
low_priority_values = [(1, "low-{}".format(index)) for index in range(5000)]
high_priority_values = [(0, "high-{}".format(index)) for index in range(1000)]
"""
deque_setup = """
from collections import deque as Deque
Q = Deque()
"""
deque_logic_input = """
for item in low_priority_values:
Q.appendleft(item[1]) # index into tuples to remove priority
for item in high_priority_values:
Q.append(item[1])
"""
deque_logic_output = """
while True:
try:
v = Q.pop()
except IndexError:
break
"""
queue_setup = """
from queue import PriorityQueue
from queue import Empty
Q = PriorityQueue()
"""
queue_logic_input = """
for item in low_priority_values:
Q.put(item)
for item in high_priority_values:
Q.put(item)
"""
queue_logic_output = """
while True:
try:
v = Q.get_nowait()
except Empty:
break
"""
# abuse string catenation to build the setup blocks
results_dict = {
"deque input": timeit.timeit(deque_logic_input, setup=deque_setup+values_builder, number=NUMBER),
"queue input": timeit.timeit(queue_logic_input, setup=queue_setup+values_builder, number=NUMBER),
"deque output": timeit.timeit(deque_logic_output, setup=deque_setup+values_builder+deque_logic_input, number=NUMBER),
"queue output": timeit.timeit(queue_logic_output, setup=queue_setup+values_builder+queue_logic_input, number=NUMBER),
}
for k, v in results_dict.items():
print("{}: {}".format(k, v))
Results (6000 elements pushed and popped, timeit number=1000)
% python3 deque_priorityqueue_compare.py
deque input: 0.853059
queue input: 24.504084000000002
deque output: 0.0013576999999997952
queue output: 0.02025689999999969
While this is a fabricated example to show off deque's performance, PriorityQueue's insert time is some significant function of its length and O(log n) or worse, while a Deque is O(1), so it should be fairly representative of a real use case

Categories

Resources