Python Concurrent.Futures ProcessPool Executor wrong output - python

I am trying to use the ProcessPoolExecutor() to run some functions but I cant manage to understand how to get the return of the functions out of the with.
def threaded_upload(i):
time.sleep(2)
if i == 0:
k = 10
elif i == 2:
k = i*i
else:
k = -99
return [k]
def controller():
if __name__ == "__main__":
futures = []
with ProcessPoolExecutor() as pool:
for paso in range(4):
futuro_i = pool.submit(threaded_upload,paso)
wth=[futuro_i.result()]
futures.append(futuro_i)
wait(futures, return_when=ALL_COMPLETED)
merged_list = []
for future in futures:
for valor in future.result():
merged_list.append(valor)
Lista_Final = merged_list
wait(futures, return_when=ALL_COMPLETED)
return Lista_Final
print(controller())
The output of the code is:
None
[10, -99, 4, -99]
I am not sure why?
The "wait" doesn't seem to wait until all functions are executed either.
To be honest, I have been reading and reading for a few days but the description of concurrent.futures or multiprocessing are more advanced that my current knowledge.
Any clarification will be appreciated.
Thanks in advance.

You first submit the jobs and then wait for the results. You can also return an integer instead of a list and then skip the inner loop:
test.py:
import random
import time
from concurrent.futures import ProcessPoolExecutor, wait
def worker(i):
t = random.uniform(1, 5)
print(f"START: {i} ({t:.3f}s)")
time.sleep(t)
if i == 0:
k = 10
elif i == 2:
k = i * i
else:
k = -99
print(f"END: {i}")
return k
def main():
futures = []
with ProcessPoolExecutor() as pool:
for i in range(4):
future = pool.submit(worker, i)
futures.append(future)
results = []
done, pending = wait(futures) # ALL_COMPLETED is the default value
for future in done:
results.append(future.result())
print(results)
if __name__ == "__main__":
main()
Test:
$ python test.py
START: 0 (1.608s)
START: 1 (1.718s)
START: 2 (1.545s)
START: 3 (1.588s)
END: 2
END: 3
END: 0
END: 1
[10, -99, 4, -99]

Related

Create a python code that counts prime numbers in range(3000000) using for-looped multithreading(e.g. if Nthreads is N, it is gonna divide 3000000/N)

I'm trying to run this program but it is showing me "Thread 0 has 0 prime numbers" in the console followed by "Killed" after 5 minutes. Moreover, it is very slow. Please help me develop and correct this code.
import time
Nthreads=4
maxNumber=3000000
starting_range=0
ending_range=0
division=0
lst=[]
def prime(x, y):
prime_list = []
for i in range(x, y):
if i == 0 or i == 1:
continue
else:
for j in range(2, int(i/2)+1):
if i % j == 0:
break
else:
prime_list.append(i)
return prime_list
def func_thread(x, y):
out.append(prime(x, y))
thread_list = []
results = len(lst)
for i in range(Nthreads):
devision=maxNumber//Nthreads
starting_range = (i-1)*division+1
ending_range = i*devision
lst = prime(starting_range, ending_range)
print(" Thread ", i, " has ", len(lst), " prime numbers." )
thread = threading.Thread(target=func_thread, args=(i, results))
thread_list.append(thread)
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()```
In Python, if you use multithreading for CPU-bound tasks, it will be slower than if you don't use multithreading. You need to use multiprocessing for this problem. You can read this article for more informations: https://www.geeksforgeeks.org/difference-between-multithreading-vs-multiprocessing-in-python/
Multithreading is wholly inappropriate for CPU-intensive work such as this. However, it can be done:
from concurrent.futures import ThreadPoolExecutor
NTHREADS = 4
MAXNUMBER = 3_000_000
CHUNK = MAXNUMBER // NTHREADS
assert MAXNUMBER % NTHREADS == 0
RANGES = [(base, base+CHUNK) for base in range(0, MAXNUMBER, CHUNK)]
all_primes = []
def isprime(n):
if n <= 3:
return n > 1
if not n % 2 or not n % 3:
return False
for i in range(5, int(n**0.5)+1, 6):
if not n % i or not n % (i + 2):
return False
return True
def process(_range):
lo, hi = _range
if lo < 3:
all_primes.append(2)
lo = 3
elif lo % 2 == 0:
lo += 1
for p in range(lo, hi, 2):
if isprime(p):
all_primes.append(p)
with ThreadPoolExecutor() as executor:
executor.map(process, RANGES)
The all_primes list is unordered. Note also that this strategy will only work if MAXNUMBER is exactly divisible by NTHREADS.
Note on performance:
This takes 7.88s on my machine. A multiprocessing variant takes 2.90s

Kill threads after one thread succesfully finished function [duplicate]

This question already has answers here:
Is there any way to kill a Thread?
(31 answers)
Closed 12 months ago.
I search a way to transform this kind of code from multiprocessing into multithreading:
import multiprocessing
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
queue = multiprocessing.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
queue.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
processes = []
manager = multiprocessing.Manager()
for i in range(5):
process = multiprocessing.Process(target=find, args=(f'computer_{i}', i))
processes.append(process)
process.start()
ret = queue.get()
for i in range(5):
process = processes[i]
process.terminate()
print(f'terminated {i}')
print(ret)
The way it works is it starts multiple processes and after the first process finished the function find isn't needed anymore. I tried to transform it in that way, but unfortunately the terminate function is not usable:
import _thread as thread
import queue
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
qu = queue.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
qu.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
threads = []
for i in range(5):
th = thread.start_new_thread(find, (f'computer_{i}', i))
threads.append(th)
ret = qu.get()
for i in range(5):
th = threads[i]
th.terminate()
print(f'terminated {i}')
print(ret)
How can I get some termination of threads?
Try:
for id, thread in threading._active.items():
types.pythonapi.PyThreadState_SetAsyncExc(id, ctypes.py_object(SystemExit))

Different result after using multiprocessing

guys:
I am new to the use of python multiprocessing. Recently my research needs calculation with many iterations. So I tried to use multiprocessing to speed it up. But when I wrote a small sample code, I found that the curve I got with multiprocessing is different from that without multiprocessing.
The code with multiprocessing:
import random
import matplotlib.pyplot as plt
import math
import numpy as np
import multiprocessing as mp
class Classic:
def __init__(self,position,type):
assert type == 'A' or type == 'B'
self.position = position
self.type = type
def getposition(self):
return self.position
def gettype (self):
return self.type
def setposition(self,pos):
self.position = pos
def settype (self,t):
self.type = t
def number_count(system):
counter = 0
for i in range(0,len(system)):
if system[i] !=0:
counter=counter+1
return counter
def time_evolution(system_temp,steps):
numberlist=np.zeros(steps)
number = number_count(system_temp)
for t in range(0,steps):
for i in range(0,len(system_temp)):
x = random.randint(0, len(system_temp)-2)
if system_temp[x]!=0 and system_temp[x+1]!=0:
p1 = system_temp[x]
p2 = system_temp[x+1]
p1_type = p1.gettype()
p2_type = p2.gettype()
exchange_check = random.randint(0,1)
if p1_type == p2_type:
system_temp[x]=0
system_temp[x+1]=0
number = number-2
elif exchange_check == 1:
type_temp = p1_type
p1.settype(p2_type)
p2.settype(type_temp)
elif system_temp[x]!=0 and system_temp[x+1]==0:
system_temp[x+1] = system_temp[x]
system_temp[x] =0
elif system_temp[x]==0 and system_temp[x+1]!=0:
system_temp[x]=system_temp[x+1]
system_temp[x+1]=0
numberlist[t]=numberlist[t]+number
return numberlist
if __name__ =='__main__':
pool = mp.Pool(8)
size = 10000
system_init = [0]*size
particle_num = 3000
repeat = 20
steps = 2000
res=[]
totalnum= np.zeros(steps)
randomlist = random.sample(range(1,100*repeat),repeat)
for i in range(0,particle_num):
pos = random.randint(0,size-1)
ran_num = random.randint (0,1)
if ran_num == 0:
temp_type = 'A'
else:
temp_type = 'B'
if system_init[pos] ==0:
system_init[pos] = Classic(pos,temp_type)
for k in range(0, repeat):
system_temp = system_init[:]
random.seed(randomlist[k])
res.append(pool.apply_async(time_evolution, args=(system_temp,steps,)))
pool.close()
pool.join()
for count in range(0,len(res)):
totalnum =totalnum+ np.array(res[count].get())
time=np.linspace(1,steps+1,steps)
time_sqrt=np.sqrt(8.0*math.pi*time)
density =totalnum/(repeat*size)
density_mod = np.multiply(time_sqrt,density)
#plt.loglog(time,density_mod)
#plt.savefig("modified_density_loglog.pdf")
#plt.close()
myfile=open('density_mod2.txt','w')
for element in density_mod:
myfile.write(str(element))
myfile.write('\n')
myfile.close()
And the code without multiprocessing is
import random
import matplotlib.pyplot as plt
import math
import numpy as np
class Classic:
def __init__(self,position,type):
assert type == 'A' or type == 'B'
self.position = position
self.type = type
def getposition(self):
return self.position
def gettype (self):
return self.type
def setposition(self,pos):
self.position = pos
def settype (self,t):
self.type = t
def number_count(system):
counter = 0
for i in range(0,len(system)):
if system[i] !=0:
counter=counter+1
return counter
def time_evolution(system_temp,steps):
numberlist=np.zeros(steps)
number = number_count(system_temp)
for t in range(0,steps):
for i in range(0,len(system_temp)):
x = random.randint(0, len(system_temp)-2)
if system_temp[x]!=0 and system_temp[x+1]!=0:
p1 = system_temp[x]
p2 = system_temp[x+1]
p1_type = p1.gettype()
p2_type = p2.gettype()
exchange_check = random.randint(0,1)
if p1_type == p2_type:
system_temp[x]=0
system_temp[x+1]=0
number = number-2
elif exchange_check == 1:
type_temp = p1_type
p1.settype(p2_type)
p2.settype(type_temp)
elif system_temp[x]!=0 and system_temp[x+1]==0:
system_temp[x+1] = system_temp[x]
system_temp[x] =0
elif system_temp[x]==0 and system_temp[x+1]!=0:
system_temp[x]=system_temp[x+1]
system_temp[x+1]=0
numberlist[t]=numberlist[t]+number
return numberlist
size = 10000
system_init = [0]*size
particle_num = 3000
repeat = 20
steps = 2000
res=[]
totalnum= np.zeros(steps)
randomlist = random.sample(range(1,100*repeat),repeat)
for i in range(0,particle_num):
pos = random.randint(0,size-1)
ran_num = random.randint (0,1)
if ran_num == 0:
temp_type = 'A'
else:
temp_type = 'B'
if system_init[pos] ==0:
system_init[pos] = Classic(pos,temp_type)
for k in range(0, repeat):
system_temp = system_init[:]
random.seed(randomlist[k])
res.append(time_evolution(system_temp,steps))
for count in range(0,len(res)):
totalnum +=res[count]
time=np.linspace(1,steps+1,steps)
time_sqrt=np.sqrt(8.0*math.pi*time)
density =totalnum/(repeat*size)
density_mod = np.multiply(time_sqrt,density)
myfile=open('density_mod3.txt','w')
for element in density_mod:
myfile.write(str(element))
myfile.write('\n')
myfile.close()
And the result is shown as
enter image description here
The blue curve is result with multiprocessing and the orange one is that without multiprocessing. I am not sure why this would happen. How to fix it?
My guess is that you don't initialize the random number generator correctly. You have to do that "inside" the spawned processes.
Check the following simple example:
import random
import multiprocessing as mp
def rand_test_1():
print(random.randint(0, 100))
return None
def rand_test_2(seed):
random.seed(seed)
print(random.randint(0, 100))
return None
if __name__ == '__main__':
repeat = 3
randomlist = random.sample(range(1, 100 * repeat), repeat)
print('Classic:')
for k in range(repeat):
random.seed(randomlist[k])
rand_test_1()
print('\nMultiprocessing version 1:')
with mp.Pool() as pool:
for k in range(repeat):
random.seed(randomlist[k])
pool.apply_async(rand_test_1, args=tuple())
pool.close()
pool.join()
print('\nMultiprocessing version 2:')
with mp.Pool() as pool:
for k in range(repeat):
pool.apply_async(rand_test_2, args=(randomlist[k],))
pool.close()
pool.join()
The results look like:
Classic:
32
78
6
Multiprocessing version 1:
84
43
90
Multiprocessing version 2:
32
78
6
You are using the multiprocessing version 1, I think you should use version 2.
One other point which has nothing to do with your problem: My impression is that it might be a good idea to use .map/.starmap (see here) instead of .apply_async:
...
with mp.Pool() as pool:
res = list(pool.map(rand_test_2, randomlist))

Subprocesses are not conducted when use multiprocessing in Python

I'm now testing the code below to calculate an array in parallel, but it seems that the long time function would not be conducted. My Python version: 3.7.4, Operation system: win 10.
from multiprocessing import Pool, Lock, Array
import os, time
def long_time_task(i,array,lock):
print('Run task %s (%s)...' % (i, os.getpid()))
start = time.time()
total_count = 0
for k in range(5*10**7): total_count += 1
total_count += i
lock.acquire()
array[i] = total_count
lock.release()
end = time.time()
print('Task %s runs %0.2f seconds.' % (i, (end - start)))
def mainFunc():
print('Parent process %s.' % os.getpid())
p = Pool()
array = Array('f', 20)
lock = Lock()
for i in range(20): p.apply_async(long_time_task, args=(i,array,lock))
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
if __name__ == '__main__':
mainFunc()
There are a few issues with your code:
The apply_async method returns a result object that you need to await for.
You can't pass normal multiprocessing Array or Lock objects to Pool methods, because they can't be pickled. You can use manager object instead.
Try this:
from multiprocessing import Pool, Lock, Array, Manager
import os, time
def long_time_task(i,array,lock):
print('Run task %s (%s)...' % (i, os.getpid()))
start = time.time()
total_count = 0
for k in range(5*10**7): total_count += 1
total_count += i
lock.acquire()
array[i] = total_count
lock.release()
end = time.time()
print('Task %s runs %0.2f seconds.' % (i, (end - start)))
def mainFunc():
print('Parent process %s.' % os.getpid())
p = Pool()
m = Manager()
array = m.Array('f', [0] * 20)
lock = m.Lock()
results = [p.apply_async(long_time_task, args=(i,array,lock)) for i in range(20)]
[result.get() for result in results]
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
if __name__ == '__main__':
mainFunc()
You could also simplify it like this but I'm not sure if this will work in your case:
import array
import os
import time
from multiprocessing import Pool
def long_time_task(i):
print(f'Run task {i} ({os.getpid()})...')
start = time.time()
total_count = 0
for k in range(5 * 10 ** 7):
total_count += 1
total_count += i
end = time.time()
print(f'Task {i} runs {end - start:.2f} seconds.')
return total_count
def main():
print('Parent process %s.' % os.getpid())
a = array.array('d', range(20))
r = range(20)
with Pool() as pool:
for idx, result in zip(r, pool.map(long_time_task, r)):
a[idx] = result
print(a)
print(f'All subprocesses done.')
if __name__ == '__main__':
main()

python Infinite Loop append in multiprocessing.manger list

Why this code isnt working:
from multiprocessing import Process, Manager
import pcap, string, sys
def f(a, p):
try:
while True:
a.append(p.next())
except KeyboardInterrupt:
print 'stop'
def g(a):
# print a
while True:
print a[len(a)-1]
if __name__ == '__main__':
# num = Value('d', 0.0)
manager = Manager()
l = manager.list([])
p = pcap.pcapObject()
dev = sys.argv[1]
p.open_live(dev, 65000, 0, 100)
p.setfilter(string.join(sys.argv[2:], ' '), 0, 0)
p = Process(target=f, args=(l, p))
p.start()
p.join()
a = Process(target=g, args=(l,))
a.start()
a.join()
# print l
# print len(l)
while below code is working fine:
from multiprocessing import Process, Manager
import pcap, string, sys
def f(a, p):
try:
while len(a) < 100:
a.append(p.next())
except KeyboardInterrupt:
print 'stop'
def g(a):
# print a
while True:
print a[len(a)-1]
if __name__ == '__main__':
# num = Value('d', 0.0)
manager = Manager()
l = manager.list([])
p = pcap.pcapObject()
dev = sys.argv[1]
p.open_live(dev, 65000, 0, 100)
p.setfilter(string.join(sys.argv[2:], ' '), 0, 0)
p = Process(target=f, args=(l, p))
p.start()
p.join()
a = Process(target=g, args=(l,))
a.start()
a.join()
# print l
# print len(l)
Other Question:
Is this is a best and fastest/optimized way way to create shared
memory between different processes?
Is multiprocessing.manager class for finite size data structure. if
no what am i doing wrong?
any help/hint would be appreciated. Thanx in advance.

Categories

Resources