Subprocesses are not conducted when use multiprocessing in Python - python

I'm now testing the code below to calculate an array in parallel, but it seems that the long time function would not be conducted. My Python version: 3.7.4, Operation system: win 10.
from multiprocessing import Pool, Lock, Array
import os, time
def long_time_task(i,array,lock):
print('Run task %s (%s)...' % (i, os.getpid()))
start = time.time()
total_count = 0
for k in range(5*10**7): total_count += 1
total_count += i
lock.acquire()
array[i] = total_count
lock.release()
end = time.time()
print('Task %s runs %0.2f seconds.' % (i, (end - start)))
def mainFunc():
print('Parent process %s.' % os.getpid())
p = Pool()
array = Array('f', 20)
lock = Lock()
for i in range(20): p.apply_async(long_time_task, args=(i,array,lock))
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
if __name__ == '__main__':
mainFunc()

There are a few issues with your code:
The apply_async method returns a result object that you need to await for.
You can't pass normal multiprocessing Array or Lock objects to Pool methods, because they can't be pickled. You can use manager object instead.
Try this:
from multiprocessing import Pool, Lock, Array, Manager
import os, time
def long_time_task(i,array,lock):
print('Run task %s (%s)...' % (i, os.getpid()))
start = time.time()
total_count = 0
for k in range(5*10**7): total_count += 1
total_count += i
lock.acquire()
array[i] = total_count
lock.release()
end = time.time()
print('Task %s runs %0.2f seconds.' % (i, (end - start)))
def mainFunc():
print('Parent process %s.' % os.getpid())
p = Pool()
m = Manager()
array = m.Array('f', [0] * 20)
lock = m.Lock()
results = [p.apply_async(long_time_task, args=(i,array,lock)) for i in range(20)]
[result.get() for result in results]
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
if __name__ == '__main__':
mainFunc()
You could also simplify it like this but I'm not sure if this will work in your case:
import array
import os
import time
from multiprocessing import Pool
def long_time_task(i):
print(f'Run task {i} ({os.getpid()})...')
start = time.time()
total_count = 0
for k in range(5 * 10 ** 7):
total_count += 1
total_count += i
end = time.time()
print(f'Task {i} runs {end - start:.2f} seconds.')
return total_count
def main():
print('Parent process %s.' % os.getpid())
a = array.array('d', range(20))
r = range(20)
with Pool() as pool:
for idx, result in zip(r, pool.map(long_time_task, r)):
a[idx] = result
print(a)
print(f'All subprocesses done.')
if __name__ == '__main__':
main()

Related

Kill threads after one thread succesfully finished function [duplicate]

This question already has answers here:
Is there any way to kill a Thread?
(31 answers)
Closed 12 months ago.
I search a way to transform this kind of code from multiprocessing into multithreading:
import multiprocessing
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
queue = multiprocessing.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
queue.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
processes = []
manager = multiprocessing.Manager()
for i in range(5):
process = multiprocessing.Process(target=find, args=(f'computer_{i}', i))
processes.append(process)
process.start()
ret = queue.get()
for i in range(5):
process = processes[i]
process.terminate()
print(f'terminated {i}')
print(ret)
The way it works is it starts multiple processes and after the first process finished the function find isn't needed anymore. I tried to transform it in that way, but unfortunately the terminate function is not usable:
import _thread as thread
import queue
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
qu = queue.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
qu.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
threads = []
for i in range(5):
th = thread.start_new_thread(find, (f'computer_{i}', i))
threads.append(th)
ret = qu.get()
for i in range(5):
th = threads[i]
th.terminate()
print(f'terminated {i}')
print(ret)
How can I get some termination of threads?
Try:
for id, thread in threading._active.items():
types.pythonapi.PyThreadState_SetAsyncExc(id, ctypes.py_object(SystemExit))

Python Concurrent.Futures ProcessPool Executor wrong output

I am trying to use the ProcessPoolExecutor() to run some functions but I cant manage to understand how to get the return of the functions out of the with.
def threaded_upload(i):
time.sleep(2)
if i == 0:
k = 10
elif i == 2:
k = i*i
else:
k = -99
return [k]
def controller():
if __name__ == "__main__":
futures = []
with ProcessPoolExecutor() as pool:
for paso in range(4):
futuro_i = pool.submit(threaded_upload,paso)
wth=[futuro_i.result()]
futures.append(futuro_i)
wait(futures, return_when=ALL_COMPLETED)
merged_list = []
for future in futures:
for valor in future.result():
merged_list.append(valor)
Lista_Final = merged_list
wait(futures, return_when=ALL_COMPLETED)
return Lista_Final
print(controller())
The output of the code is:
None
[10, -99, 4, -99]
I am not sure why?
The "wait" doesn't seem to wait until all functions are executed either.
To be honest, I have been reading and reading for a few days but the description of concurrent.futures or multiprocessing are more advanced that my current knowledge.
Any clarification will be appreciated.
Thanks in advance.
You first submit the jobs and then wait for the results. You can also return an integer instead of a list and then skip the inner loop:
test.py:
import random
import time
from concurrent.futures import ProcessPoolExecutor, wait
def worker(i):
t = random.uniform(1, 5)
print(f"START: {i} ({t:.3f}s)")
time.sleep(t)
if i == 0:
k = 10
elif i == 2:
k = i * i
else:
k = -99
print(f"END: {i}")
return k
def main():
futures = []
with ProcessPoolExecutor() as pool:
for i in range(4):
future = pool.submit(worker, i)
futures.append(future)
results = []
done, pending = wait(futures) # ALL_COMPLETED is the default value
for future in done:
results.append(future.result())
print(results)
if __name__ == "__main__":
main()
Test:
$ python test.py
START: 0 (1.608s)
START: 1 (1.718s)
START: 2 (1.545s)
START: 3 (1.588s)
END: 2
END: 3
END: 0
END: 1
[10, -99, 4, -99]

How to stop multiprocessing.Pool.map on exception

When I raise an exception inside my thread_function, it doesn't stop the rest of the map processing. I'd like to stop it.
def thread_function(n):
if n == 10:
raise Exception('Stop everything!')
pool = Pool(processes = 4)
pool.map(thread_function, range(1, 1000), chunksize = 1)
I'd expect no more processing after one thread reached n == 10.
I don't know of a way to do this directly with map but you can monitor an async_map like this...
from multiprocessing import Pool
import time
def thread_function(n):
if n == 10:
print('Raising Exception')
raise Exception('Stop everything!')
print(n)
time.sleep(0.1)
pool = Pool(processes = 4)
result = pool.map_async(thread_function, range(1, 1000), chunksize = 1)
while not result.ready():
if not result._success:
print('Exiting for failure')
pool.terminate()
pool.join()
break

What is the time difference between a normal python code and the same code in multiprocessing?

I'm trying to clearly understand the difference of a function in single process and the same function in multiple cores. The following normal python code and multiprocessor code gives the same time (approx). Am i using multiprocessing wrong?
Normal Python code:
import time
def basic_func(x):
if x == 0:
return 'zero'
elif x % 2 == 0:
return 'even'
else:
return 'odd'
def multiprocessing_func(x):
y = x * x
print('{} squared results in a/an {} number'.format(x, basic_func(y)))
if __name__ == '__main__':
starttime = time.time()
for each in range(0, 1000):
multiprocessing_func(each)
print('That took {} seconds'.format(time.time() - starttime))
Multiprocessing code:
import time
import multiprocessing
def basic_func(x):
if x == 0:
return 'zero'
elif x % 2 == 0:
return 'even'
else:
return 'odd'
def multiprocessing_func(x):
y = x * x
print('{} squared results in a/an {} number'.format(x, basic_func(y)))
if __name__ == '__main__':
starttime = time.time()
pool = multiprocessing.Pool()
pool.map(multiprocessing_func, range(0, 1000))
pool.close()
print('That took {} seconds'.format(time.time() - starttime))
Thanks in advance !
code source : This tutorial
Without multiprocessing, I executed this code in 0.07s. The multiprocessing version took 0.28s. Create some pool of processes take some times and it may not be worth it.
I recommend not printing during the process as it could create a funnel effect (I/O is always an issue for concurrent processes)
Changing a little bit your code :
import time
import multiprocessing
def basic_func(x):
if x == 0:
return 'zero'
elif x % 2 == 0:
return 'even'
else:
return 'odd'
def multiprocessing_func(x):
y = x * x
return basic_func(y)
And comparing results :
starttime = time.time()
for each in range(0, 100000000):
multiprocessing_func(each)
print('That took {} seconds'.format(time.time() - starttime))
Took 34s
starttime = time.time()
pool = multiprocessing.Pool(processes=10)
pool.map(multiprocessing_func, range(0, 100000000))
pool.close()
print('That took {} seconds'.format(time.time() - starttime))
Took 9.6s
See that the "same" problem had drastic different results. Answering your question is not possible, it depends too much on the initial problem, funnel effects and the balance between the duration of the task and the cost of creating pool of processes.

python Infinite Loop append in multiprocessing.manger list

Why this code isnt working:
from multiprocessing import Process, Manager
import pcap, string, sys
def f(a, p):
try:
while True:
a.append(p.next())
except KeyboardInterrupt:
print 'stop'
def g(a):
# print a
while True:
print a[len(a)-1]
if __name__ == '__main__':
# num = Value('d', 0.0)
manager = Manager()
l = manager.list([])
p = pcap.pcapObject()
dev = sys.argv[1]
p.open_live(dev, 65000, 0, 100)
p.setfilter(string.join(sys.argv[2:], ' '), 0, 0)
p = Process(target=f, args=(l, p))
p.start()
p.join()
a = Process(target=g, args=(l,))
a.start()
a.join()
# print l
# print len(l)
while below code is working fine:
from multiprocessing import Process, Manager
import pcap, string, sys
def f(a, p):
try:
while len(a) < 100:
a.append(p.next())
except KeyboardInterrupt:
print 'stop'
def g(a):
# print a
while True:
print a[len(a)-1]
if __name__ == '__main__':
# num = Value('d', 0.0)
manager = Manager()
l = manager.list([])
p = pcap.pcapObject()
dev = sys.argv[1]
p.open_live(dev, 65000, 0, 100)
p.setfilter(string.join(sys.argv[2:], ' '), 0, 0)
p = Process(target=f, args=(l, p))
p.start()
p.join()
a = Process(target=g, args=(l,))
a.start()
a.join()
# print l
# print len(l)
Other Question:
Is this is a best and fastest/optimized way way to create shared
memory between different processes?
Is multiprocessing.manager class for finite size data structure. if
no what am i doing wrong?
any help/hint would be appreciated. Thanx in advance.

Categories

Resources