I have a function with multiple arguments. I need is a code block that will execute my function parallel with 3 different sets of arguments then hold there till all the processes are done before running my other code block.
I tried this but the result isn't the one I need.
import multiprocessing
from itertools import product
bs = []
def a(i):
for x in range(i):
print(i)
b = x + 1
bs.append(b)
return bs
if __name__ == '__main__':
i = range(4)
with multiprocessing.Pool(processes=3) as pool:
result = pool.starmap(a, product(i))
print(result)
The result:
1
2
2
3
3
3
[[], [1], [1, 1, 2], [1, 1, 2, 1, 2, 3]]
I have read thread where guys used Process from multiprocessing, which have .join() but I don't understand enough to write codes base on them.
Edit:
I also tried this and get an error:
from multiprocessing import Process
bs = []
def a(i):
for x in range(i):
print(i)
b = x + 1
bs.append(b)
return bs
if __name__ == '__main__':
p1 = Process(target=a(2))
p1.start()
p2 = Process(target=a(3))
p2.start()
p1.join()
p2.join()
The result:
2
2
3
3
3
Process Process-1:
Traceback (most recent call last):
File "E:\Python38-32\lib\multiprocessing\process.py", line 315, in _bootstrap
self.run()
File "E:\Python38-32\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
TypeError: 'list' object is not callable
Process Process-2:
Traceback (most recent call last):
File "E:\Python38-32\lib\multiprocessing\process.py", line 315, in _bootstrap
self.run()
File "E:\Python38-32\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
TypeError: 'list' object is not callable
Since multiprocessing is a bit complicated, can you help me to solve this?.
Thank you.
Related
I have a following problem. I am running a parallel task. I am getting this error:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "eclat_model.py", line 127, in do_work
function(*args, work_queue, valid_list)
File "eclat_model.py", line 115, in eclat_parallel_helper
valid_list.extend(next_vectors)
File "<string>", line 2, in extend
File "/usr/lib/python3.8/multiprocessing/managers.py", line 834, in _callmethod
conn.send((self._id, methodname, args, kwds))
File "/usr/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib/python3.8/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Relevant functions in eclat_model.py look like this:
def eclat_parallel_helper(index, bit_vectors, min_support, work_queue, valid_list):
next_vectors = []
for j in range(index + 1, len(bit_vectors)):
item_vector = bit_vectors[index][0] | bit_vectors[j][0]
transaction_vector = bit_vectors[index][1] & bit_vectors[j][1]
support = get_vector_support(transaction_vector)
if support >= min_support:
next_vectors.append((item_vector, transaction_vector, support))
if len(next_vectors) > 0:
valid_list.extend(next_vectors)
for i in range(len(next_vectors)):
work_queue.put((eclat_parallel_helper, (i, next_vectors, min_support)))
def do_work(work_queue, valid_list, not_done):
# work queue entries have the form (function, args)
while not_done.value:
try:
function, args = work_queue.get_nowait()
except QueueEmptyError:
continue
function(*args, work_queue, valid_list)
work_queue.task_done()
work_queue.close()
EDIT:
Multiprocessing part of the code is as follows: bit_vectors is a list of lists, where each entry is of the form
[items, transactions, support], where items is a bit vector encoding which items appear in the itemset, vector is a bit vector encoding which transactions the itemset appears in, and support is the number of transactions in which the itemset occurs.
from multiprocessing import Process, JoinableQueue, Manager, Value, cpu_count
def eclat_parallel(bit_vectors, min_support):
not_done = Value('i', 1)
manager = Manager()
valid_list = manager.list()
work_queue = JoinableQueue()
for i in range(len(bit_vectors)):
work_queue.put((eclat_parallel_helper, (i, bit_vectors, min_support)))
processes = []
for i in range(cpu_count()):
p = Process(target=do_work, args=(work_queue, valid_list, not_done), daemon=True)
p.start()
processes.append(p)
work_queue.join()
not_done.value = 0
work_queue.close()
valid_itemset_vectors = bit_vectors
for element in valid_list:
valid_itemset_vectors.append(element)
for p in processes:
p.join()
return valid_itemset_vectors
What does this error mean, please? Am I appending too many elements into next_vectors list?
I had the same issue, in my case just added a delay (time.sleep(0.01)) to solve it.
The problem is that the individual processes are too fast on queue that causes the error.
I'm wrting a program that spawns a process and restarts the process on certain conditions. For example, if a child process doesn't send data anymore to the mother process, for a certain period of time, I want the mother process to terminate the child process and restart it. I thought I could use a thread to recieve data from a child process and restart the child process, but it doesn't work the way I thought.
import numpy as np
import multiprocessing as mp
import threading
import time
from apscheduler.schedulers.background import BackgroundScheduler
pipe_in, pipe_out = mp.Pipe()
class Mother():
def __init__(self):
self.pipe_out = pipe_out
self.proc = mp.Process(target = self.test_func, args=(pipe_in, ))
self.proc.start()
self.thread = threading.Thread(target=self.thread_reciever, args=(self.pipe_out, ))
self.thread.start()
def thread_reciever(self, pipe_out):
while True:
value = pipe_out.recv()
print(value)
if value == 5:
self.proc.terminate()
time.sleep(2)
self.proc = mp.Process(target = self.test_func)
self.proc.start()
def test_func(self, pipe_in):
for i in range(10):
pipe_in.send(i)
time.sleep(1)
if __name__ == '__main__':
r = Mother()
It prints out this error.
D:\>d:\python36-32\python.exe temp06.py
0
1
2
3
4
5
Exception in thread Thread-1:
Traceback (most recent call last):
File "d:\python36-32\lib\threading.py", line 916, in _bootstrap_inner
self.run()
File "d:\python36-32\lib\threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "temp06.py", line 28, in thread_reciever
self.proc.start()
File "d:\python36-32\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "d:\python36-32\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "d:\python36-32\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "d:\python36-32\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "d:\python36-32\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.lock objects
D:\>Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\python36-32\lib\multiprocessing\spawn.py", line 99, in spawn_main
new_handle = reduction.steal_handle(parent_pid, pipe_handle)
File "d:\python36-32\lib\multiprocessing\reduction.py", line 82, in steal_handle
_winapi.PROCESS_DUP_HANDLE, False, source_pid)
OSError: [WinError 87]
How could I start and terminate a process inside a thread? (I'm using a thread because it can synchronously recieve data from a different process) Or are there any other ways to do this job?
test_func as a global function
import numpy as np
import multiprocessing as mp
import threading
import time
from apscheduler.schedulers.background import BackgroundScheduler
pipe_in, pipe_out = mp.Pipe()
def test_func( pipe_in):
for i in range(10):
pipe_in.send(i)
time.sleep(1)
class Mother():
def __init__(self):
self.pipe_out = pipe_out
mp.freeze_support()
self.proc = mp.Process(target = test_func, args=(pipe_in, ))
self.proc.start()
self.thread = threading.Thread(target=self.thread_reciever, args=(self.pipe_out, ))
self.thread.start()
def thread_reciever(self, pipe_out):
while True:
value = pipe_out.recv()
print(value)
if value == 5:
self.proc.terminate()
time.sleep(2)
mp.freeze_support()
self.proc = mp.Process(target = test_func, args=(pipe_in,))
self.proc.start()
if __name__ == '__main__':
r = Mother()
OUTPUT
D:\> d:\python36-32\python.exe temp06.py
0
1
2
3
4
5
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\python36-32\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "d:\python36-32\lib\multiprocessing\spawn.py", line 115, in _main
self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'test_func' on <module '__main__' (built-in)>
under windows, as there is no fork syscall, python starts a new interpreter instance, use pickle/unpickle to reconstruct execution context, but thread.Lock is not picklable. while pickling self.test_func, self.thread reference to a thread.Lock object, makes it unpicklable.
you could simply change test_func to a plain global function, without thread object reference :
self.proc = mp.Process(target = test_func, args=(pipe_in,))
...
def test_func(pipe_in):
for i in range(10):
pipe_in.send(i)
time.sleep(1)
I'm play with multiprocessing in Python. I'm trying to determine what happends if a workers raise an exception so I wrote the following code:
def a(num):
if(num == 2):
raise Exception("num can't be 2")
print(num)
p = Pool()
p.map(a, [2, 1, 3, 4, 5, 6, 7, 100, 100000000000000, 234, 234, 5634, 0000])
output
3
4
5
7
6
100
100000000000000
234
234
5634
0
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.5/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "<stdin>", line 3, in a
Exception: Error, num can't be 2
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python3.5/multiprocessing/pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
Exception: Error, num can't be 2
If you can see the numbers that was printed "2" is not there but Why is not number 1 also there?
Note: I'm using Python 3.5.2 on Ubuntu
By default, Pool creates a number of workers equal to your number of cores. When one of those worker processes dies, it may leave work that has been assigned to it undone. It also may leave output in a buffer that never gets flushed.
The pattern with .map() is to handle exceptions in the workers and return some suitable error value, since the results of .map() are supposed to be one-to-one with the input.
from multiprocessing import Pool
def a(num):
try:
if(num == 2):
raise Exception("num can't be 2")
print(num, flush=True)
return num
except Exception as e:
print('failed', flush=True)
return e
p = Pool()
n=100
results = p.map(a, range(n))
print("missing numbers: ", tuple(i for i in range(n) if i not in results))
Here's another question with good information about how exceptions propagate in multiprocessing.map workers.
I get an error when trying to run a command with joblib/multiprocessing in parallel:
Here the traceback:
Process PoolWorker-263:
Traceback (most recent call last):
File "/home/marcel/anaconda/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/marcel/anaconda/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/marcel/anaconda/lib/python2.7/multiprocessing/pool.py", line 102, in worker
task = get()
File "/home/marcel/.local/lib/python2.7/site-packages/joblib/pool.py", line 363, in get
File "_objects.pyx", line 240, in h5py._objects.ObjectID.__cinit__ (h5py/_objects.c:2994)
TypeError: __cinit__() takes exactly 1 positional argument (0 given)
As you can see from the error message I work with data loaded using h5py. To complicate things further the routine I want to parallelize uses numba in one of its subroutines, but I hope that does not matter.
Here is a running example, which you can copy and paste:
from joblib import Parallel,delayed
import numpy as np
import h5py as h5
import os
def testfunc(h5data, row):
# some very boneheaded CPU work
data_slice = h5data[:,row,...]
ma = np.mean(data_slice, axis = 1)
x = row
return ma, x
def run():
data = np.random.random((100,100,100))
print data
f_out = h5.File('tmp.h5', 'w')
dset = f_out.create_dataset('mydata', data = data )
f_out.close()
f_in = h5.File('tmp.h5', 'r')
h5data = f_in['mydata']
pool = Parallel(n_jobs=-1,verbose=1,pre_dispatch='all')
results = pool(delayed(testfunc)(h5data, i) for i in range(h5data.shape[1]))
f_in.close()
os.remove('tmp.h5')
if __name__ == '__main__':
run()
Any ideas, what I'm doing wrong?
Edit: Okay at least I can exclude numba from the list of evildoers...
1You can try to replace Ë‹joblibwith [pathos][1] which replacespicklewithdill`. This solves generally all pickling issues.
I have a two dimensional function and I want to compute the elements of the function on the grid points but the two loops over rows and columns are very slow and I want to use multiprocessing to increase the speed of the code. I have written the following code to do two loops:
from multiprocessing import Pool
#Grid points
ra = np.linspace(25.1446, 25.7329, 1000)
dec = np.linspace(-10.477, -9.889, 1000)
#The 2D function
def like2d(x,y):
stuff=[RaDec, beta, rho_c_over_sigma_c, zhalo, rho_crit]
m=3e14
c=7.455
param=[x, y, m, c]
return reduced_shear( param, stuff, observed_g, g_err)
pool = Pool(processes=12)
def data_stream(a, b):
for i, av in enumerate(a):
for j, bv in enumerate(b):
yield (i, j), (av, bv)
def myfunc(args):
return args[0], like2d(*args[1])
counter,likelihood = pool.map(myfunc, data_stream(ra, dec))
But I got the following error message:
Process PoolWorker-1:
Traceback (most recent call last):
File "/user/anaconda/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/user/anaconda/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/user/anaconda/lib/python2.7/multiprocessing/pool.py", line 102, in worker
task = get()
File "/user/anaconda/lib/python2.7/multiprocessing/queues.py", line 376, in get
return recv()
AttributeError: 'module' object has no attribute 'myfunc'
Process PoolWorker-2:
Traceback (most recent call last):
File "/user/anaconda/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/user/anaconda/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/user/anaconda/lib/python2.7/multiprocessing/pool.py", line 102, in worker
task = get()
File "/user/anaconda/lib/python2.7/multiprocessing/queues.py", line 376, in get
return recv()
AttributeError: 'module' object has no attribute 'myfunc'
Process PoolWorker-3:
Traceback (most recent call last):
File "/user/anaconda/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/user/anaconda/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/user/anaconda/lib/python2.7/multiprocessing/pool.py", line 102, in worker
task = get()
File "/user/anaconda/lib/python2.7/multiprocessing/queues.py", line 376, in get
return recv()
AttributeError: 'module' object has no attribute 'myfunc'
Process PoolWorker-4:
Everything is defined and I do not understand why this error message raised!! Can anybody point out what might be wrong?
Another approach to do loops with multiprocessing and save the results in a 2d array:
#Grid points
ra = np.linspace(25.1446, 25.7329, 1000)
dec = np.linspace(-10.477, -9.889, 1000)
#The 2D function
def like2d(x,y):
stuff=[RaDec, beta, rho_c_over_sigma_c, zhalo, rho_crit]
m=3e14
c=7.455
param=[x, y, m, c]
return reduced_shear( param, stuff, observed_g, g_err)
shared_array_base = multiprocessing.Array(ctypes.c_double, ra.shape[0]*dec.shape[0])
shared_array = np.ctypeslib.as_array(shared_array_base.get_obj())
shared_array = shared_array.reshape( ra.shape[0],dec.shape[0])
# Parallel processing
def my_func(i, def_param=shared_array):
shared_array[i,:] = np.array([float(like2d(ra[j],dec[i])) for j in range(ra.shape[0])])
print "processing to estimate likelihood in 2D grids......!!!"
start = time.time()
pool = multiprocessing.Pool(processes=12)
pool.map(my_func, range(dec.shape[0]))
print shared_array
end = time.time()
print end - start
You have to create the Pool after the worker function (myfunc) definition. Creating the Pool causes Python to fork your worker processes right at that point, and the only things that will be defined in the children are the functions defined above the Pool definition. Also, map will return a list of tuples (one for each object yielded by data_stream), not a single tuple. So you need this:
from multiprocessing import Pool
#Grid points
ra = np.linspace(25.1446, 25.7329, 1000)
dec = np.linspace(-10.477, -9.889, 1000)
#The 2D function
def like2d(x,y):
stuff=[RaDec, beta, rho_c_over_sigma_c, zhalo, rho_crit]
m=3e14
c=7.455
param=[x, y, m, c]
return reduced_shear( param, stuff, observed_g, g_err)
def data_stream(a, b):
for i, av in enumerate(a):
for j, bv in enumerate(b):
yield (i, j), (av, bv)
def myfunc(args):
return args[0], like2d(*args[1])
if __name__ == "__main__":
pool = Pool(processes=12)
results = pool.map(myfunc, data_stream(ra, dec)) # results is a list of tuples.
for counter,likelihood in results:
print("counter: {}, likelihood: {}".format(counter, likelihood))
I added the if __name__ == "__main__": guard, which isn't necessary on POSIX platforms, but would be necessary on Windows (which doesn't support os.fork()).