I have some programm in which multiple processes try to finish some function. My aim now is to stop all the other processes after one process has successfully finished the function.
The python program shown below unfortunately waits until all the processes successfully solved the question given in find function. How can I fix my problem?
import multiprocessing
import random
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
def find(process, initial, return_dict):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
return_dict[process] = f"Found: {process}, start: {initial}"
succ = True
break;
i = random.choice(INTERVAL)
start = start + i
print(start)
processes = []
manager = multiprocessing.Manager()
return_code = manager.dict()
for i in range(5):
process = multiprocessing.Process(target=find, args=(f'computer_{i}', i, return_code))
processes.append(process)
process.start()
for process in processes:
process.join()
print(return_code.values())
output can be for example:
['Found: computer_0, start: 0', 'Found: computer_4, start: 4', 'Found: computer_2, start: 2', 'Found: computer_1, start: 1', 'Found: computer_3, start: 3']
But this output shows me the program is waiting until all processes are finished ...
Use an Event to govern if the processes should keep running.
Basically, it replaces succ with something that works over all processes.
import multiprocessing
import random
FIND = 50
MAX_COUNT = 1000
def find(process, initial, return_dict, run):
while run.is_set():
start = initial
while start <= MAX_COUNT:
if FIND == start:
return_dict[process] = f"Found: {process}, start: {initial}"
run.clear() # Stop running.
break
start += random.randrange(0, 10)
print(start)
if __name__ == "__main__":
processes = []
manager = multiprocessing.Manager()
return_code = manager.dict()
run = manager.Event()
run.set() # We should keep running.
for i in range(5):
process = multiprocessing.Process(
target=find, args=(f"computer_{i}", i, return_code, run)
)
processes.append(process)
process.start()
for process in processes:
process.join()
print(return_code.values())
Note that using __name__ is mandatory for multiprocessing to work properly on ms-windows and macOS.
On those systems, the main module is imported into newly created Python processes. This needs to happen without side effects such as starting a process, and the __name__ mechanism ensures that.
You can do this using multiprocessing.Queue and multiprocessing.Queue.get. How this works is that get by default blocks until there's something in the queue. So it will return the first result that gets appended to the queue, i.e. one of the processes finishing the search. After that, we can iterate over the processes and terminate each one (note that terminating a process doesn't kill child processes spawned by the process unless daemon is set to True).
import multiprocessing
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
queue = multiprocessing.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
queue.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
processes = []
manager = multiprocessing.Manager()
for i in range(5):
process = multiprocessing.Process(target=find, args=(f'computer_{i}', i))
processes.append(process)
process.start()
ret = queue.get()
for i in range(5):
process = processes[i]
process.terminate()
print(f'terminated {i}')
print(ret)
You might also want to look into setting the daemon, which kills the processes after the main process exits.
I want to kill a certain process or all processes in the middle of its execution. My sample code is as follows. How can I do that? Here, I want to kill the processes based on the current time. If the time is divisible by 2, I want to kill the processes, otherwise not.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
proc.join()
if kill_processes:
print("killing")
proc.terminate()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
This illustrates how to do it by using multiple threads in a manner similar to what's in the answer to the question multiprocessing in stoppable multithreading that I suggested you have a look at.
Basically all that is going on is all the join() calls are done in separate threads, so they won't block the main thread in the main process — which makes it possible for it to kill them.
import threading
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def create_process(lock, i):
proc = Process(target=runTests, args=(2, 4,))
print(f'{proc.name} created')
proc.start()
with lock:
processes.append(proc)
proc.join()
def main(kill_processes):
global processes
N = 3
lock = threading.RLock()
processes = []
print(f'main({kill_processes=})')
for i in range(N):
thread = threading.Thread(target=create_process, args=(lock, i))
thread.start()
while True: # Wait for all processes to have been created.
with lock:
if len(processes) == N:
break
else:
time.sleep(.001)
if kill_processes:
print("Killing the processes")
for proc in processes:
proc.terminate()
print(f'process {proc} terminated')
if __name__ == "__main__":
main(True)
# if round(time.time()) % 2 == 0:
# main(True)
# else:
# main(False)
You can use multiprocessing.Event to signal termination condition to your child processes. Don't join child processes in the main process. Instead let main process and child processes run in their own loop. Check the termination condition in the main loop, and signal it using the multiprocessing.Event to the child processes.
The Event object is passed as an argument to child process. Child process continuously checks if the event is set, and stops its work if so. Main loop checks the termination condition and sets the Event if condition is met (in the below example main loop waits for Ctrl+c).
import multiprocessing as mp
import os
import time
def do_work(a, b, stop_event):
while not stop_event.is_set():
try:
time.sleep(2)
print(f"worker {os.getpid()}: working ...", a + b)
a += 1
b += 1
except KeyboardInterrupt:
print(f"worker {os.getpid()}: received SIGINT. ignore.")
pass
print(f"worker {os.getpid()}: stop_event is set. exit.")
if __name__ == "__main__":
stop_event = mp.Event()
procs = []
for i in range(3):
# p = mp.Process(target=do_work, args=(1, 2, stop_event), daemon=True)
p = mp.Process(target=do_work, args=(1, 2, stop_event))
p.start()
procs.append(p)
while True:
try:
print("main: waiting for termination signal")
time.sleep(1)
except KeyboardInterrupt:
print("main: received termination signal")
stop_event.set()
# wait for the processes to stop
for p in procs:
p.join()
for p in procs:
print(f"worker {p.pid} is terminated: {not p.is_alive()}")
# exit the main loop
break
print("main: bye")
If you want to terminate based on time, use the timeout paramter of join. One way is to set a stop time and as each process is joined, use time remaining as its timeout.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
end = time.time() + 10 # wait 10 seconds
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
if kill_processes:
delta = end - time.time()
else:
delta = None
proc.join(delta)
if kill_processes:
print("killing")
proc.terminate()
proc.join(1)
if proc.is_alive():
proc.kill()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
In the example code below, I'd like to get the return value of the function worker. How can I go about doing this? Where is this value stored?
Example Code:
import multiprocessing
def worker(procnum):
'''worker function'''
print str(procnum) + ' represent!'
return procnum
if __name__ == '__main__':
jobs = []
for i in range(5):
p = multiprocessing.Process(target=worker, args=(i,))
jobs.append(p)
p.start()
for proc in jobs:
proc.join()
print jobs
Output:
0 represent!
1 represent!
2 represent!
3 represent!
4 represent!
[<Process(Process-1, stopped)>, <Process(Process-2, stopped)>, <Process(Process-3, stopped)>, <Process(Process-4, stopped)>, <Process(Process-5, stopped)>]
I can't seem to find the relevant attribute in the objects stored in jobs.
Use shared variable to communicate. For example like this:
import multiprocessing
def worker(procnum, return_dict):
"""worker function"""
print(str(procnum) + " represent!")
return_dict[procnum] = procnum
if __name__ == "__main__":
manager = multiprocessing.Manager()
return_dict = manager.dict()
jobs = []
for i in range(5):
p = multiprocessing.Process(target=worker, args=(i, return_dict))
jobs.append(p)
p.start()
for proc in jobs:
proc.join()
print(return_dict.values())
I think the approach suggested by #sega_sai is the better one. But it really needs a code example, so here goes:
import multiprocessing
from os import getpid
def worker(procnum):
print('I am number %d in process %d' % (procnum, getpid()))
return getpid()
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 3)
print(pool.map(worker, range(5)))
Which will print the return values:
I am number 0 in process 19139
I am number 1 in process 19138
I am number 2 in process 19140
I am number 3 in process 19139
I am number 4 in process 19140
[19139, 19138, 19140, 19139, 19140]
If you are familiar with map (the Python 2 built-in) this should not be too challenging. Otherwise have a look at sega_Sai's link.
Note how little code is needed. (Also note how processes are re-used).
For anyone else who is seeking how to get a value from a Process using Queue:
import multiprocessing
ret = {'foo': False}
def worker(queue):
ret = queue.get()
ret['foo'] = True
queue.put(ret)
if __name__ == '__main__':
queue = multiprocessing.Queue()
queue.put(ret)
p = multiprocessing.Process(target=worker, args=(queue,))
p.start()
p.join()
print(queue.get()) # Prints {"foo": True}
Note that in Windows or Jupyter Notebook, with multithreading you have to save this as a file and execute the file. If you do it in a command prompt you will see an error like this:
AttributeError: Can't get attribute 'worker' on <module '__main__' (built-in)>
For some reason, I couldn't find a general example of how to do this with Queue anywhere (even Python's doc examples don't spawn multiple processes), so here's what I got working after like 10 tries:
from multiprocessing import Process, Queue
def add_helper(queue, arg1, arg2): # the func called in child processes
ret = arg1 + arg2
queue.put(ret)
def multi_add(): # spawns child processes
q = Queue()
processes = []
rets = []
for _ in range(0, 100):
p = Process(target=add_helper, args=(q, 1, 2))
processes.append(p)
p.start()
for p in processes:
ret = q.get() # will block
rets.append(ret)
for p in processes:
p.join()
return rets
Queue is a blocking, thread-safe queue that you can use to store the return values from the child processes. So you have to pass the queue to each process. Something less obvious here is that you have to get() from the queue before you join the Processes or else the queue fills up and blocks everything.
Update for those who are object-oriented (tested in Python 3.4):
from multiprocessing import Process, Queue
class Multiprocessor():
def __init__(self):
self.processes = []
self.queue = Queue()
#staticmethod
def _wrapper(func, queue, args, kwargs):
ret = func(*args, **kwargs)
queue.put(ret)
def run(self, func, *args, **kwargs):
args2 = [func, self.queue, args, kwargs]
p = Process(target=self._wrapper, args=args2)
self.processes.append(p)
p.start()
def wait(self):
rets = []
for p in self.processes:
ret = self.queue.get()
rets.append(ret)
for p in self.processes:
p.join()
return rets
# tester
if __name__ == "__main__":
mp = Multiprocessor()
num_proc = 64
for _ in range(num_proc): # queue up multiple tasks running `sum`
mp.run(sum, [1, 2, 3, 4, 5])
ret = mp.wait() # get all results
print(ret)
assert len(ret) == num_proc and all(r == 15 for r in ret)
This example shows how to use a list of multiprocessing.Pipe instances to return strings from an arbitrary number of processes:
import multiprocessing
def worker(procnum, send_end):
'''worker function'''
result = str(procnum) + ' represent!'
print result
send_end.send(result)
def main():
jobs = []
pipe_list = []
for i in range(5):
recv_end, send_end = multiprocessing.Pipe(False)
p = multiprocessing.Process(target=worker, args=(i, send_end))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
result_list = [x.recv() for x in pipe_list]
print result_list
if __name__ == '__main__':
main()
Output:
0 represent!
1 represent!
2 represent!
3 represent!
4 represent!
['0 represent!', '1 represent!', '2 represent!', '3 represent!', '4 represent!']
This solution uses fewer resources than a multiprocessing.Queue which uses
a Pipe
at least one Lock
a buffer
a thread
or a multiprocessing.SimpleQueue which uses
a Pipe
at least one Lock
It is very instructive to look at the source for each of these types.
It seems that you should use the multiprocessing.Pool class instead and use the methods .apply() .apply_async(), map()
http://docs.python.org/library/multiprocessing.html?highlight=pool#multiprocessing.pool.AsyncResult
You can use the exit built-in to set the exit code of a process. It can be obtained from the exitcode attribute of the process:
import multiprocessing
def worker(procnum):
print str(procnum) + ' represent!'
exit(procnum)
if __name__ == '__main__':
jobs = []
for i in range(5):
p = multiprocessing.Process(target=worker, args=(i,))
jobs.append(p)
p.start()
result = []
for proc in jobs:
proc.join()
result.append(proc.exitcode)
print result
Output:
0 represent!
1 represent!
2 represent!
3 represent!
4 represent!
[0, 1, 2, 3, 4]
The pebble package has a nice abstraction leveraging multiprocessing.Pipe which makes this quite straightforward:
from pebble import concurrent
#concurrent.process
def function(arg, kwarg=0):
return arg + kwarg
future = function(1, kwarg=1)
print(future.result())
Example from: https://pythonhosted.org/Pebble/#concurrent-decorators
Thought I'd simplify the simplest examples copied from above, working for me on Py3.6. Simplest is multiprocessing.Pool:
import multiprocessing
import time
def worker(x):
time.sleep(1)
return x
pool = multiprocessing.Pool()
print(pool.map(worker, range(10)))
You can set the number of processes in the pool with, e.g., Pool(processes=5). However it defaults to CPU count, so leave it blank for CPU-bound tasks. (I/O-bound tasks often suit threads anyway, as the threads are mostly waiting so can share a CPU core.) Pool also applies chunking optimization.
(Note that the worker method cannot be nested within a method. I initially defined my worker method inside the method that makes the call to pool.map, to keep it all self-contained, but then the processes couldn't import it, and threw "AttributeError: Can't pickle local object outer_method..inner_method". More here. It can be inside a class.)
(Appreciate the original question specified printing 'represent!' rather than time.sleep(), but without it I thought some code was running concurrently when it wasn't.)
Py3's ProcessPoolExecutor is also two lines (.map returns a generator so you need the list()):
from concurrent.futures import ProcessPoolExecutor
with ProcessPoolExecutor() as executor:
print(list(executor.map(worker, range(10))))
With plain Processes:
import multiprocessing
import time
def worker(x, queue):
time.sleep(1)
queue.put(x)
queue = multiprocessing.SimpleQueue()
tasks = range(10)
for task in tasks:
multiprocessing.Process(target=worker, args=(task, queue,)).start()
for _ in tasks:
print(queue.get())
Use SimpleQueue if all you need is put and get. The first loop starts all the processes, before the second makes the blocking queue.get calls. I don't think there's any reason to call p.join() too.
If you are using Python 3, you can use concurrent.futures.ProcessPoolExecutor as a convenient abstraction:
from concurrent.futures import ProcessPoolExecutor
def worker(procnum):
'''worker function'''
print(str(procnum) + ' represent!')
return procnum
if __name__ == '__main__':
with ProcessPoolExecutor() as executor:
print(list(executor.map(worker, range(5))))
Output:
0 represent!
1 represent!
2 represent!
3 represent!
4 represent!
[0, 1, 2, 3, 4]
A simple solution:
import multiprocessing
output=[]
data = range(0,10)
def f(x):
return x**2
def handler():
p = multiprocessing.Pool(64)
r=p.map(f, data)
return r
if __name__ == '__main__':
output.append(handler())
print(output[0])
Output:
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
You can use ProcessPoolExecutor to get a return value from a function as shown below:
from concurrent.futures import ProcessPoolExecutor
def test(num1, num2):
return num1 + num2
with ProcessPoolExecutor() as executor:
feature = executor.submit(test, 2, 3)
print(feature.result()) # 5
I modified vartec's answer a bit since I needed to get the error codes from the function. (Thanks vertec!!! its an awesome trick)
This can also be done with a manager.list but I think is better to have it in a dict and store a list within it. That way, way we keep the function and the results since we can't be sure of the order in which the list will be populated.
from multiprocessing import Process
import time
import datetime
import multiprocessing
def func1(fn, m_list):
print 'func1: starting'
time.sleep(1)
m_list[fn] = "this is the first function"
print 'func1: finishing'
# return "func1" # no need for return since Multiprocess doesnt return it =(
def func2(fn, m_list):
print 'func2: starting'
time.sleep(3)
m_list[fn] = "this is function 2"
print 'func2: finishing'
# return "func2"
def func3(fn, m_list):
print 'func3: starting'
time.sleep(9)
# if fail wont join the rest because it never populate the dict
# or do a try/except to get something in return.
raise ValueError("failed here")
# if we want to get the error in the manager dict we can catch the error
try:
raise ValueError("failed here")
m_list[fn] = "this is third"
except:
m_list[fn] = "this is third and it fail horrible"
# print 'func3: finishing'
# return "func3"
def runInParallel(*fns): # * is to accept any input in list
start_time = datetime.datetime.now()
proc = []
manager = multiprocessing.Manager()
m_list = manager.dict()
for fn in fns:
# print fn
# print dir(fn)
p = Process(target=fn, name=fn.func_name, args=(fn, m_list))
p.start()
proc.append(p)
for p in proc:
p.join() # 5 is the time out
print datetime.datetime.now() - start_time
return m_list, proc
if __name__ == '__main__':
manager, proc = runInParallel(func1, func2, func3)
# print dir(proc[0])
# print proc[0]._name
# print proc[0].name
# print proc[0].exitcode
# here you can check what did fail
for i in proc:
print i.name, i.exitcode # name was set up in the Process line 53
# here will only show the function that worked and where able to populate the
# manager dict
for i, j in manager.items():
print dir(i) # things you can do to the function
print i, j
I'm experimenting with the multiprocessing module and I'm getting some strange behavior. I have a list of tasks, which I first enqueue to a task_queue. Then I start all processes, which have access to both the task queue and the result queue. After the processes have started, I actively check for new content in the result queue while there are active processes.
The weird behavior is that when I start the script below, 4 of the processes exit immediately, and all the work is done by one process. No except Exception clauses are entered.
from multiprocessing import Process, Queue
from time import sleep, time
from queue import Empty
def function_doing_heavy_computation(task):
return task + 1
def service_function(func, tasks_q, result_q):
"""
:param func: user passed function that takes one argument - the task and returns the result of processing the task
:param tasks_q:
:param result_q:
"""
while True:
try:
task = tasks_q.get_nowait()
except Empty:
result_q.close() # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue.close
result_q.join_thread()
print("Worker finished ")
break
except Exception as ex:
print(str(ex))
try:
res = func(task)
except Exception as ex:
print(str(ex))
result_q.put_nowait(res)
def multiproc(func, all_tasks, num_procs):
result = []
task_queue, result_queue = Queue(), Queue()
# add the tasks to the task queue
start_put_tasks = time()
for x in all_tasks:
task_queue.put_nowait(x)
print("Finished adding tasks in %.2f" % (time() - start_put_tasks))
# create the processes and pass them the task and result queue
start_create_procs = time()
procs = []
for _ in range(num_procs):
p = Process(target=service_function, args=(func, task_queue, result_queue))
procs.append(p)
for p in procs:
p.start()
print("Started %i workers in %.2f" % (len(procs), time() - start_create_procs))
# collect the results in a list, and return it
start_drain_queue = time()
liveprocs = list(procs)
while liveprocs:
# drain the current contents of the result_queue
while True:
try:
result.append(result_queue.get_nowait())
except Empty:
break
# set the currently active procs. while loop will exit if all procs have terminated
liveprocs = [p for p in procs if p.is_alive()]
# no process has put a result/all ready results have been drained. wait for new results to arrive
sleep(.1)
print("Finished draining result queue in %.2f" % (time() - start_drain_queue))
if len(result) != len(all_tasks):
raise RuntimeError("Only %i/%i tasks processed" % (len(result), len(all_tasks)))
return result
if __name__ == '__main__':
start = time()
# a task is just a number here.
# normally more tasks than workers. i
tasks = range(100000)
result = multiproc(func=function_doing_heavy_computation, all_tasks=tasks, num_procs=5)
print("Done in %s seconds" % str(time() - start))
assert len(tasks) == len(result)
print("Processed %i tasks" % len(result))
I'm using multiprocessing to create a sub-process to my Python app.
I would like to share data between my parent process and the child process.
it's important to mention that I need to share this asynchronously, means that the child process and the parent process will update the data during the code running.
What would be the best way to perform that?
This is one simple example from python documentation -
from multiprocessing import Process, Queue
def f(q):
q.put([42, None, 'hello'])
if __name__ == '__main__':
q = Queue()
p = Process(target=f, args=(q,))
p.start()
print q.get() # prints "[42, None, 'hello']"
p.join()
You can use pipe as well,
Refer for more details - https://docs.python.org/2/library/multiprocessing.html
Here's an example of multiprocess-multithread and sharing a couple variables:
from multiprocessing import Process, Queue, Value, Manager
from ctypes import c_bool
from threading import Thread
ps = []
def yourFunc(pause, budget):
while True:
print(budget.value, pause.value)
##set value
pause.value = True
....
def multiProcess(threads, pause, budget):
for _ in range(threads):
t = Thread(target=yourFunc(), args=(pause, budget,))
t.start()
ts.append(t)
time.sleep(3)
if __name__ == '__main__':
pause = Value(c_bool, False)
budget = Value('i', 5000)
for i in range(2):
p = Process(target=multiProcess, args=(2, pause, budget))
p.start()
ps.append(p)