Is it bad practice to set the state of future to pass arguments?
Specifically using something like future.q = q to use q in the callback
from threading import Thread
from threading import RLock
from threading import current_thread
from concurrent.futures import Future
import time
import random
class NonBlockingQueue:
def __init__(self, max_size):
self.max_size = max_size
self.q = []
self.q_waiting_puts = []
self.q_waiting_gets = []
self.lock = RLock()
def enqueue(self, item):
future = None
with self.lock:
curr_size = len(self.q)
# queue is full so create a future for a put
# request
if curr_size == self.max_size:
future = Future()
self.q_waiting_puts.append(future)
else:
self.q.append(item)
# remember to resolve a pending future for
# a get request
if len(self.q_waiting_gets) != 0:
future_get = self.q_waiting_gets.pop(0)
future_get.set_result(self.q.pop(0))
return future
def retry_enqueue(future):
print("\nCallback invoked by thread {0}".format(current_thread().getName()))
item = future.item
q = future.q
new_future = q.enqueue(item)
if new_future is not None:
new_future.item = item
new_future.q = q
new_future.add_done_callback(retry_enqueue)
else:
print("\n{0} successfully added on a retry".format(item))
### MAIN CODE
def producer_thread(q):
item = 1
while 1:
future = q.enqueue(item)
if future is not None:
future.item = item
future.q = q
future.add_done_callback(retry_enqueue)
item += 1
# slow down the producer
time.sleep(random.randint(1, 3))
It is not a good idea to pass around arguments like this.
The reason is that in future (no pun), they could just disallow setting custom attributes on the Future object, which will break your code.
Better solution is to use functools.partial or lambda to pass extra arguments to the callback.
First, accept q as an argument in the retry_enqueue function:
def retry_enqueue(future, q): # accept 'q' argument
...
Example using functools.partial:
import functools
future.add_done_callback(functools.partial(retry_enqueue, q=q))
Example using lambda:
future.add_done_callback(lambda future: retry_enqueue(future, q))
Related
I have a multiprocessing setup that handles a long running task by appending all calculated values to lst. It looks roughly like this:
from multiprocessing import Pool
from time import sleep
def fun(_):
lst = [] # list that will be returned
for i in range(200):
lst.append(i)
if not i % 10:
sleep(0.1) # 'long task', cause a KeyboardInterrupt in this time
return lst
if __name__ == '__main__':
master = []
processes = 2
for result in Pool(processes).imap_unordered(fun, range(processes)):
master.append(result)
print(master)
I want to be able to cause a KeyboardInterrupt and have the processes return the list they worked on, even if they are not done yet, as each iteration just adds a new sublist.
(My actual data looks roughly like lst = ([], [[], ...], [[], ...]), every empty list contains ints only, the actual function would return lst1, lst2, lst3)
I have tried to envelop the whole main part in try: except: like so:
try:
for result in Pool(processes).imap_unordered(fun, range(processes)):
master.append(result)
except KeyboardInterrupt:
# somehow retrieve the values here
pass
I have however not come to any possible solution this way.
How can I tell the processes it's time to exit early and return me their current result?
Edit to show the actual structure:
main.py:
from other import Other
class Something:
def __init__(self):
pass # stuff here
def spawner(self):
for result in Pool(processes=self.processes).imap_unordered(self.loop, range(self.processes)):
pass # do stuff with the data
def loop(self, _):
# setup stuff
Other(setup_stuff).start()
other.py
class Other:
def __init__(self):
pass # more stuff
def start(self):
lst1, lst2, lst3 = [], [], []
for _ in range(self.episodes):
pass # do the actual computation
return lst1, lst2, lst3
Maybe you can use multiprocessing.Queue instead of a list to return variables. Set-up one queue at the beginning and all processes will write to the queue.
At the end, read all values from the queue.
from time import sleep
from multiprocessing import Pool, Queue
q = None
def set_global_data(queue):
global q
q = queue
def fun(_):
for i in range(200):
q.put_nowait(i)
if not i % 10:
sleep(0.1) # 'long task', cause a KeyboardInterrupt in this time
# nothing is returned
if __name__ == "__main__":
master = Queue()
processes = 2
try:
with Pool(processes, set_global_data, (master,)) as p:
for result in p.imap_unordered(fun, range(processes)):
pass
except KeyboardInterrupt:
pass
while not master.empty():
v = master.get_nowait()
print(v)
EDIT: With multiple files:
main.py
from other import Other
from multiprocessing import Pool, Queue
class Something:
def __init__(self):
pass # stuff here
def spawner(self):
master = Queue()
try:
with Pool(2, Something.set_global_data, (master,)) as p:
for _ in p.imap_unordered(self.loop, range(2)):
pass
except KeyboardInterrupt:
pass
while not master.empty():
v = master.get_nowait()
print(v)
def loop(self, _):
# setup stuff
Other().start()
#staticmethod
def set_global_data(queue):
Other.q = queue
s = Something()
s.spawner()
other.py
from time import sleep
class Other:
q = None
def __init__(self):
pass # more stuff
def start(self):
for i in range(200):
Other.q.put_nowait(i)
if not i % 10:
sleep(0.1)
I have a thread object that I can't distribute across a ProcessPoolExecutor, but would like to return a future. If I already have a future, is there a way to apply to its completed value, eg, Future a -> (a -> b) -> Future b?
import concurrent.futures
import threading
def three(x):
return 2+x
if __name__ == '__main__':
trackedItem = (3, threading.Event())
pool = concurrent.futures.ProcessPoolExecutor(3)
poolJob = (q.submit(three, trackedItem[0]),trackedItem[1]) #(Future(int), Event)
*** something magic goes here ***
#Trying to transform it into Future(int,Event)
Here's a way which uses a simpler setup code, without threading.Event as that doesn't seem necessary to solve the problem. Basically, you can create future_b as a new Future() yourself, and use the add_done_callback method on future_a to set the result of future_b. Here, func_a is the computation to compute the result of future_a, and func_b is the computation to compute the result of future_b using the result of future_a.
from concurrent.futures import ProcessPoolExecutor, Future
def func_a(x):
return 2 + x
def func_b(x):
return 10 * x
if __name__ == '__main__':
pool = ProcessPoolExecutor(3)
future_a = pool.submit(func_a, 3)
future_b = Future()
future_b.set_running_or_notify_cancel()
def callback(f):
x = f.result()
y = func_b(x)
future_b.set_result(y)
future_a.add_done_callback(callback)
print(future_b.result()) # 50
If you want a helper function to do this, you can write one: map_future takes a future and a mapping function, and returns the new mapped future as required. This version handles an exception in case f.result() or func_b throws one:
def map_future(future_a, func):
future_b = Future()
future_b.set_running_or_notify_cancel()
def callback(f):
try:
x = f.result()
y = func(x)
future_b.set_result(y)
except Exception as e:
future_b.set_exception(e)
future_a.add_done_callback(callback)
return future_b
Caveats: this goes against the advice in the documentation for the Future class, which says:
Future instances are created by Executor.submit() and should not be created directly except for testing.
Also, if you have any errors which aren't subclasses of Exception in the callback, they will be "logged and ignored" according to the docs. I've chosen to only catch Exception in this code for simplicity, but you might prefer the sys.exc_info()[0] way of catching every possible thing that could be raised.
#kaya3 provided a great answer but I ran into problem when adding exception handling for it to close the pool. You can find my example cpchung_example below to see how to compose future functionally. It still remains to add exception-handling to it that I dont have a good solution yet.
For comparison, I put them all into one file:
from concurrent.futures import ProcessPoolExecutor, Future
from concurrent.futures.thread import ThreadPoolExecutor
def map_future(future_a, func):
future_b = Future()
future_b.set_running_or_notify_cancel()
def callback(f):
try:
x = f.result()
y = func(x)
future_b.set_result(y)
except Exception as e:
future_b.set_exception(e)
future_a.add_done_callback(callback)
return future_b
def func_a(x):
return 2 + x
def func_b(x):
return 3 * x
def func_c(x):
raise NameError('Hi There')
return 4 * x
def kaya3_example():
future_a = pool.submit(func_a, 3)
future_b = Future()
future_b.set_running_or_notify_cancel()
def callback(f):
x = f.result()
y = func_b(x)
future_b.set_result(y)
future_a.add_done_callback(callback)
print(future_b.result()) # 50
def exception_handling():
try:
future_a = pool.submit(func_a, 3)
future_b = map_future(future_a, func_b)
future_c = map_future(future_b, func_c)
print(future_c.result())
except Exception as e:
pool.shutdown()
pool.shutdown()
def f(x, y):
return x * y
def cpchung_example():
with ThreadPoolExecutor(max_workers=1) as executor:
a = executor.submit(f, 2, 3)
b = executor.submit(f, 4, 5)
c = executor.submit(f, a.result(), b.result())
print(c.result())
if __name__ == '__main__':
pool = ProcessPoolExecutor(3)
kaya3_example()
cpchung_example()
# exception_handling() # not working, still wip
I am trying to use a ProcessPoolExecutor, but I am getting the error "Queue objects should only be shared between processes through inheritance", but I am not using a Queue (at least not explicitly). I can't find anything that explains what I am doing wrong.
Here is some code that demonstrates the issue (not my actual code):
from concurrent.futures import ProcessPoolExecutor, as_completed
class WhyDoesntThisWork:
def __init__(self):
self.executor = ProcessPoolExecutor(4)
def execute_something(self, starting_letter):
futures = [self.executor.submit(self.something, starting_letter, d) for d in range(4)]
letter = None
for future in as_completed(futures):
letter = future.result()
print(letter)
def something(self, letter, d):
# do something pointless for the example
for x in range(d):
letter = chr(ord(letter) + 1)
if __name__ == '__main__':
WhyDoesntThisWork(). execute_something('A')
El Ruso has pointed out that making something() a staticmethod or classmethod makes the error go away. Unfortunately, my actual code needs to call other methods using self.
Can be solved without using static approach.
When using process, each process runs in an independent memory space. That's unlike when using thread, when different threads are running under the same process, using the same memory space. Thus the error doesn't occur when you use
ThreadPoolExecutor but occurs in ProcessPoolExecutor.
So when the function of the class instance is delivered into separate sub-processes, the multiprocessing mechanism pickles the function so that the function can be passed into the sub-process as an independent instance. And when the sub-process joined, the class is updated by the function instance delivered back as unpicked one.
To make it work, just add __getstate__() and __setstate__() functions to the class to guide the class how to pickle and unpickle the function. In pickling, the unnecessary fields can be excluded as shown in del self_dict['executor'].
import multiprocessing
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
class GuessItWorksNow():
def __init__(self):
self.executor = ProcessPoolExecutor(4)
def __getstate__(self):
state = self.__dict__.copy()
del state['executor']
return state
def __setstate__(self, state):
self.__dict__.update(state)
def something(self, letter, d):
# do something pointless for the example
p = multiprocessing.current_process()
time.sleep(1)
for x in range(d):
letter = chr(ord(letter) + 1)
return (f'[{p.pid}] ({p.name}) ({letter})')
def execute_something(self, starting_letter):
futures = [self.executor.submit(self.something, starting_letter, d) for d in range(10)]
for future in as_completed(futures):
print(future.result())
if __name__ == '__main__':
obj = GuessItWorksNow()
obj.execute_something('A')
try this code for something
#staticmethod
def something(letter, d):
# do something pointless for the example
for x in range(d):
letter = chr(ord(letter) + 1)
or refactor to:
from concurrent.futures import ProcessPoolExecutor, as_completed
class WhyDoesntThisWork:
def something(self, letter, d):
# do something pointless for the example
for x in range(d):
letter = chr(ord(letter) + 1)
return letter
if __name__ == '__main__':
executor = ProcessPoolExecutor(4)
letter = 'A'
obj = WhyDoesntThisWork()
futures = [executor.submit(obj.something, letter, d) for d in range(4)]
for future in as_completed(futures):
print(future.result())
I've been looking into a way to directly change variables in a running module.
What I want to achieve is that a load test is being run and that I can manually adjust the call pace or whatsoever.
Below some code that I just created (not-tested e.d.), just to give you an idea.
class A():
def __init__(self):
self.value = 1
def runForever(self):
while(1):
print self.value
def setValue(self, value):
self.value = value
if __name__ == '__main__':
#Some code to create the A object and directly apply the value from an human's input
a = A()
#Some parallelism or something has to be applied.
a.runForever()
a.setValue(raw_input("New value: "))
Edit #1: Yes, I know that now I will never hit the a.setValue() :-)
Here is a multi-threaded example. This code will work with the python interpreter but not with the Python Shell of IDLE, because the raw_input function is not handled the same way.
from threading import Thread
from time import sleep
class A(Thread):
def __init__(self):
Thread.__init__(self)
self.value = 1
self.stop_flag = False
def run(self):
while not self.stop_flag:
sleep(1)
print(self.value)
def set_value(self, value):
self.value = value
def stop(self):
self.stop_flag = True
if __name__ == '__main__':
a = A()
a.start()
try:
while 1:
r = raw_input()
a.set_value(int(r))
except:
a.stop()
The pseudo code you wrote is quite similar to the way Threading / Multiprocessing works in python. You will want to start a (for example) thread that "runs forever" and then instead of modifying the internal rate value directly, you will probably just send a message through a Queue that gives the new value.
Check out this question.
Here is a demonstration of doing what you asked about. I prefer to use Queues to directly making calls on threads / processes.
import Queue # !!warning. if you use multiprocessing, use multiprocessing.Queue
import threading
import time
def main():
q = Queue.Queue()
tester = Tester(q)
tester.start()
while True:
user_input = raw_input("New period in seconds or (q)uit: ")
if user_input.lower() == 'q':
break
try:
new_speed = float(user_input)
except ValueError:
new_speed = None # ignore junk
if new_speed is not None:
q.put(new_speed)
q.put(Tester.STOP_TOKEN)
class Tester(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, q):
threading.Thread.__init__(self)
self.q = q
self.speed = 1
def run(self):
while True:
# get from the queue
try:
item = self.q.get(block=False) # don't hang
except Queue.Empty:
item = None # do nothing
if item:
# stop when requested
if item == self.STOP_TOKEN:
break # stop this thread loop
# otherwise check for a new speed
try:
self.speed = float(item)
except ValueError:
pass # whatever you like with unknown input
# do your thing
self.main_code()
def main_code(self):
time.sleep(self.speed) # or whatever you want to do
if __name__ == '__main__':
main()
How can I implement conditional lock in threaded application, for instance I haw
30 threads that are calling function and for most off the time all threads can access is simultaneous, but depending on function input there can be condition when only one thread can do that one thing. (If value for input is repeated and some thread is still working then I need lock.)
I now that there is module threading with Rlock() but I don't now how to use it in a way that i described it in first part.
Edit: The question is actually about how to prevent any two threads from running the same function with the same argument at the same time. (Thanks to David for helping me formulate my question :) )
Try this: have a lock in the module where your function is, and if the input to the function is such that locking is required, acquire the lock inside the function. Otherwise don't.
l = threading.RLock()
def fn(arg):
if arg == arg_that_needs_lock:
l.acquire()
try:
# do stuff
finally:
l.release()
else:
# do other stuff
EDIT:
As far as I can tell now, the question is actually about how to prevent any two threads from running the same function with the same argument at the same time. There's no problem with two threads running the same function with different arguments at the same time, though. The simple method to do this, if all valid arguments to the function can be dictionary keys, is to create a dictionary of arguments to locks:
import threading
dict_lock = threading.RLock()
locks = {}
def fn_dict(arg):
dict_lock.acquire()
try:
if arg not in dict:
locks[arg] = threading.RLock()
l = locks[arg]
finally:
dict_lock.release()
l.acquire()
try:
# do stuff
finally:
l.release()
If your function can be called with many different arguments, though, that amounts to a lot of locks. Probably a better way is to have a set of all arguments with which the function is currently executing, and have the contents of that set protected by a lock. I think this should work:
set_condition = threading.Condition()
current_args = set()
def fn_set(arg):
set_condition.acquire()
try:
while arg in current_args:
set_condition.wait()
current_args.add(arg)
finally:
set_condition.release()
# do stuff
set_condition.acquire()
try:
current_args.remove(arg)
set_condition.notifyAll()
finally:
set_condition.release()
It sounds like you want something similar to a Readers-Writer lock.
This is probably not what you want, but might be a clue:
from __future__ import with_statement
import threading
def RWLock(readers = 1, writers = 1):
m = _Monitor(readers, writers)
return (_RWLock(m.r_increment, m.r_decrement), _RWLock(m.w_increment, m.w_decrement))
class _RWLock(object):
def __init__(self, inc, dec):
self.inc = inc
self.dec = dec
def acquire(self):
self.inc()
def release(self):
self.dec()
def __enter__(self):
self.inc()
def __exit__(self):
self.dec()
class _Monitor(object):
def __init__(self, max_readers, max_writers):
self.max_readers = max_readers
self.max_writers = max_writers
self.readers = 0
self.writers = 0
self.monitor = threading.Condition()
def r_increment(self):
with self.monitor:
while self.writers > 0 and self.readers < self.max_readers:
self.monitor.wait()
self.readers += 1
self.monitor.notify()
def r_decrement(self):
with self.monitor:
while self.writers > 0:
self.monitor.wait()
assert(self.readers > 0)
self.readers -= 1
self.monitor.notify()
def w_increment(self):
with self.monitor:
while self.readers > 0 and self.writers < self.max_writers:
self.monitor.wait()
self.writers += 1
self.monitor.notify()
def w_decrement(self):
with self.monitor:
assert(self.writers > 0)
self.writers -= 1
self.monitor.notify()
if __name__ == '__main__':
rl, wl = RWLock()
wl.acquire()
wl.release()
rl.acquire()
rl.release()
(Unfortunately not tested)