Belows example works fine but when should you start using more than one pool for multithreading/processing in Python and why or is it okay to always use the same pool.
from concurrent.futures import ThreadPoolExecutor, as_completed
def func_one():
print("does something")
def func_two(some_results):
print("does something related to func_one")
def func_three():
print("does something totally diffrent")
some_list = ["foo", "bar", "baz"]
thread_lst_one = []
thread_lst_two = []
thread_lst_three = []
with ThreadPoolExecutor() as executor:
for foo in some_list:
thread_lst_one.append(executor.submit(func_one))
while True:
for thread in as_completed(thread_lst_one):
thread_lst_two.append(executor.submit(func_two, thread.result()))
if True:
executor.submit(func_three)
I'm using a decorator for the thread pool executor:
from functools import wraps
from .bounded_pool_executor import BoundedThreadPoolExecutor
_DEFAULT_POOL = BoundedThreadPoolExecutor(max_workers=5)
def threadpool(f, executor=None):
#wraps(f)
def wrap(*args, **kwargs):
return (executor or _DEFAULT_POOL).submit(f, *args, **kwargs)
where the BoundedThreadPoolExecutor is defined here
When I try to use the concurrent futures in a function decorated with #threadpool and then waiting all the futures withas_completed like
def get_results_as_completed(futures):
# finished, pending = wait(futures, return_when=ALL_COMPLETED)
futures_results = as_completed(futures)
for f in futures_results:
try:
yield f.result()
except:
pass
for some worker defined like
from thread_support import threadpool
from time import sleep
from random import randint
#threadpool
def my_worker:
res = {}
# do something
sleep(randint(1, 5))
return res
if __name__ == "__main__":
futures_results = get_results_as_completed(futures)
for r in futures_results:
results.append(r)
I cannot get the futures completed despite of the .result() call, thus resulting in a infinite loop on futures_results. Why?
I want to test async_who function by pytest.
How do I test callback is called and the return value is 'Bob'
import threading
def async_who(callback):
t = threading.Thread(target=_who, args=(callback,))
t.start()
def _who(callback):
return callback('Bob')
def callback(name):
print(name)
return name
async_who(callback)
Because the async_who didn't return value. I can't do this,
def test_async_who():
res = async_who(callback)
assert res == 'Bob'
ThreadPool from multiprocessing module or ThreadPoolExecutor (for python version >= 3.2)
are ways to get the return value of a thread.
With concurrent.futures.ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor
def async_who(callback):
executor = ThreadPoolExecutor(max_workers=2)
res = executor.submit(_who, callback)
return res.result()
def _who(callback):
return callback('Bob')
def callback(name):
print(name)
return name
def test_async_who():
res = async_who(callback)
assert res == 'Bob'
With multiprocessing.pool.ThreadPool
from multiprocessing.pool import ThreadPool
pool = ThreadPool(processes=2)
def async_who(callback):
res = pool.apply_async(_who, args=(callback,))
return res.get()
def _who(callback):
return callback('Bob')
def callback(name):
print(name)
return name
def test_async_who():
res = async_who(callback)
assert res == 'Bob'
I made the very simple example and see whether starmap can concurrently call add_func and iter_func which yield new argument at the same time, But unfortunately, it doesn't work
from functools import partial
from itertools import repeat
from multiprocessing import Pool, freeze_support
import time
def add_func(a, b):
print(a,b)
return a + b
def iter_func():
i = 0
while True:
print("yeild")
yield i,i+1
i+=1
time.sleep(1)
def main():
with Pool() as pool:
L = pool.starmap(add_func, iter_func())
print(L)
freeze_support()
main()
For the following code, which passes an instance method to the Pool, the list is empty at the end of the script:
import time
from multiprocessing import Pool
class Hello:
def __init__(self):
self.result_list=[]
def f(self,x,y):
time.sleep(2)
return x*y
def log_result(self,result):
# This is called whenever foo_pool(i) returns a result.
# result_list is modified only by the main process, not the pool workers.
print result
self.result_list.append(result)
if __name__ == '__main__':
pool = Pool() # start 4 worker processes
h=Hello()
for i in range(10):
pool.apply_async(h.f, args = (i,i, ), callback = h.log_result)
pool.close()
pool.join()
print(h.result_list)
With this code, the list is populated as expected.
import multiprocessing as mp
import time
def foo_pool(x):
time.sleep(2)
return x*x
result_list = []
def log_result(result):
# This is called whenever foo_pool(i) returns a result.
# result_list is modified only by the main process, not the pool workers.
result_list.append(result)
def apply_async_with_callback():
pool = mp.Pool()
for i in range(10):
pool.apply_async(foo_pool, args = (i, ), callback = log_result)
pool.close()
pool.join()
print(result_list)
if __name__ == '__main__':
apply_async_with_callback()
What's different about the two? Why doesn't it work with the instance method?
If you actually try to fetch the result of one of you apply_async calls, you'll see that they're all failing with this error:
cPickle.PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
This is because in Python 2.x, instance methods aren't picklable by default, so trying to pass the instance method h.f to the worker process fails. This is actually fixed in Python 3, but you can backport the behavior to Python 2 quite easily, using the copy_reg module:
import time
from multiprocessing import Pool
import copy_reg
import types
def _reduce_method(m):
if m.__self__ is None:
return getattr, (m.__class__, m.__func__.__name__)
else:
return getattr, (m.__self__, m.__func__.__name__)
copy_reg.pickle(types.MethodType, _reduce_method)
class Hello:
def __init__(self):
self.result_list=[]
def f(self,x,y):
time.sleep(2)
return x*y
def log_result(self, result):
print(result)
self.result_list.append(result)
if __name__ == '__main__':
pool = Pool()
h = Hello()
for i in range(10):
pool.apply_async(h.f, args = (i,i), callback=h.log_result)
pool.close()
pool.join()
print(h.result_list)
Output:
0
4
49
25
1
16
36
9
64
81
[0, 4, 49, 25, 1, 16, 36, 9, 64, 81]