When I raise an exception inside my thread_function, it doesn't stop the rest of the map processing. I'd like to stop it.
def thread_function(n):
if n == 10:
raise Exception('Stop everything!')
pool = Pool(processes = 4)
pool.map(thread_function, range(1, 1000), chunksize = 1)
I'd expect no more processing after one thread reached n == 10.
I don't know of a way to do this directly with map but you can monitor an async_map like this...
from multiprocessing import Pool
import time
def thread_function(n):
if n == 10:
print('Raising Exception')
raise Exception('Stop everything!')
print(n)
time.sleep(0.1)
pool = Pool(processes = 4)
result = pool.map_async(thread_function, range(1, 1000), chunksize = 1)
while not result.ready():
if not result._success:
print('Exiting for failure')
pool.terminate()
pool.join()
break
Related
I'm trying to run this program but it is showing me "Thread 0 has 0 prime numbers" in the console followed by "Killed" after 5 minutes. Moreover, it is very slow. Please help me develop and correct this code.
import time
Nthreads=4
maxNumber=3000000
starting_range=0
ending_range=0
division=0
lst=[]
def prime(x, y):
prime_list = []
for i in range(x, y):
if i == 0 or i == 1:
continue
else:
for j in range(2, int(i/2)+1):
if i % j == 0:
break
else:
prime_list.append(i)
return prime_list
def func_thread(x, y):
out.append(prime(x, y))
thread_list = []
results = len(lst)
for i in range(Nthreads):
devision=maxNumber//Nthreads
starting_range = (i-1)*division+1
ending_range = i*devision
lst = prime(starting_range, ending_range)
print(" Thread ", i, " has ", len(lst), " prime numbers." )
thread = threading.Thread(target=func_thread, args=(i, results))
thread_list.append(thread)
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()```
In Python, if you use multithreading for CPU-bound tasks, it will be slower than if you don't use multithreading. You need to use multiprocessing for this problem. You can read this article for more informations: https://www.geeksforgeeks.org/difference-between-multithreading-vs-multiprocessing-in-python/
Multithreading is wholly inappropriate for CPU-intensive work such as this. However, it can be done:
from concurrent.futures import ThreadPoolExecutor
NTHREADS = 4
MAXNUMBER = 3_000_000
CHUNK = MAXNUMBER // NTHREADS
assert MAXNUMBER % NTHREADS == 0
RANGES = [(base, base+CHUNK) for base in range(0, MAXNUMBER, CHUNK)]
all_primes = []
def isprime(n):
if n <= 3:
return n > 1
if not n % 2 or not n % 3:
return False
for i in range(5, int(n**0.5)+1, 6):
if not n % i or not n % (i + 2):
return False
return True
def process(_range):
lo, hi = _range
if lo < 3:
all_primes.append(2)
lo = 3
elif lo % 2 == 0:
lo += 1
for p in range(lo, hi, 2):
if isprime(p):
all_primes.append(p)
with ThreadPoolExecutor() as executor:
executor.map(process, RANGES)
The all_primes list is unordered. Note also that this strategy will only work if MAXNUMBER is exactly divisible by NTHREADS.
Note on performance:
This takes 7.88s on my machine. A multiprocessing variant takes 2.90s
This question already has answers here:
Is there any way to kill a Thread?
(31 answers)
Closed 12 months ago.
I search a way to transform this kind of code from multiprocessing into multithreading:
import multiprocessing
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
queue = multiprocessing.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
queue.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
processes = []
manager = multiprocessing.Manager()
for i in range(5):
process = multiprocessing.Process(target=find, args=(f'computer_{i}', i))
processes.append(process)
process.start()
ret = queue.get()
for i in range(5):
process = processes[i]
process.terminate()
print(f'terminated {i}')
print(ret)
The way it works is it starts multiple processes and after the first process finished the function find isn't needed anymore. I tried to transform it in that way, but unfortunately the terminate function is not usable:
import _thread as thread
import queue
import random
import time
FIND = 50
MAX_COUNT = 100000
INTERVAL = range(10)
qu = queue.Queue(maxsize=1)
def find(process, initial):
succ = False
while succ == False:
start=initial
while(start <= MAX_COUNT):
if(FIND == start):
qu.put(f"Found: {process}, start: {initial}")
break;
i = random.choice(INTERVAL)
start = start + i
print(process, start)
threads = []
for i in range(5):
th = thread.start_new_thread(find, (f'computer_{i}', i))
threads.append(th)
ret = qu.get()
for i in range(5):
th = threads[i]
th.terminate()
print(f'terminated {i}')
print(ret)
How can I get some termination of threads?
Try:
for id, thread in threading._active.items():
types.pythonapi.PyThreadState_SetAsyncExc(id, ctypes.py_object(SystemExit))
I am trying to use the ProcessPoolExecutor() to run some functions but I cant manage to understand how to get the return of the functions out of the with.
def threaded_upload(i):
time.sleep(2)
if i == 0:
k = 10
elif i == 2:
k = i*i
else:
k = -99
return [k]
def controller():
if __name__ == "__main__":
futures = []
with ProcessPoolExecutor() as pool:
for paso in range(4):
futuro_i = pool.submit(threaded_upload,paso)
wth=[futuro_i.result()]
futures.append(futuro_i)
wait(futures, return_when=ALL_COMPLETED)
merged_list = []
for future in futures:
for valor in future.result():
merged_list.append(valor)
Lista_Final = merged_list
wait(futures, return_when=ALL_COMPLETED)
return Lista_Final
print(controller())
The output of the code is:
None
[10, -99, 4, -99]
I am not sure why?
The "wait" doesn't seem to wait until all functions are executed either.
To be honest, I have been reading and reading for a few days but the description of concurrent.futures or multiprocessing are more advanced that my current knowledge.
Any clarification will be appreciated.
Thanks in advance.
You first submit the jobs and then wait for the results. You can also return an integer instead of a list and then skip the inner loop:
test.py:
import random
import time
from concurrent.futures import ProcessPoolExecutor, wait
def worker(i):
t = random.uniform(1, 5)
print(f"START: {i} ({t:.3f}s)")
time.sleep(t)
if i == 0:
k = 10
elif i == 2:
k = i * i
else:
k = -99
print(f"END: {i}")
return k
def main():
futures = []
with ProcessPoolExecutor() as pool:
for i in range(4):
future = pool.submit(worker, i)
futures.append(future)
results = []
done, pending = wait(futures) # ALL_COMPLETED is the default value
for future in done:
results.append(future.result())
print(results)
if __name__ == "__main__":
main()
Test:
$ python test.py
START: 0 (1.608s)
START: 1 (1.718s)
START: 2 (1.545s)
START: 3 (1.588s)
END: 2
END: 3
END: 0
END: 1
[10, -99, 4, -99]
I'm now testing the code below to calculate an array in parallel, but it seems that the long time function would not be conducted. My Python version: 3.7.4, Operation system: win 10.
from multiprocessing import Pool, Lock, Array
import os, time
def long_time_task(i,array,lock):
print('Run task %s (%s)...' % (i, os.getpid()))
start = time.time()
total_count = 0
for k in range(5*10**7): total_count += 1
total_count += i
lock.acquire()
array[i] = total_count
lock.release()
end = time.time()
print('Task %s runs %0.2f seconds.' % (i, (end - start)))
def mainFunc():
print('Parent process %s.' % os.getpid())
p = Pool()
array = Array('f', 20)
lock = Lock()
for i in range(20): p.apply_async(long_time_task, args=(i,array,lock))
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
if __name__ == '__main__':
mainFunc()
There are a few issues with your code:
The apply_async method returns a result object that you need to await for.
You can't pass normal multiprocessing Array or Lock objects to Pool methods, because they can't be pickled. You can use manager object instead.
Try this:
from multiprocessing import Pool, Lock, Array, Manager
import os, time
def long_time_task(i,array,lock):
print('Run task %s (%s)...' % (i, os.getpid()))
start = time.time()
total_count = 0
for k in range(5*10**7): total_count += 1
total_count += i
lock.acquire()
array[i] = total_count
lock.release()
end = time.time()
print('Task %s runs %0.2f seconds.' % (i, (end - start)))
def mainFunc():
print('Parent process %s.' % os.getpid())
p = Pool()
m = Manager()
array = m.Array('f', [0] * 20)
lock = m.Lock()
results = [p.apply_async(long_time_task, args=(i,array,lock)) for i in range(20)]
[result.get() for result in results]
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
if __name__ == '__main__':
mainFunc()
You could also simplify it like this but I'm not sure if this will work in your case:
import array
import os
import time
from multiprocessing import Pool
def long_time_task(i):
print(f'Run task {i} ({os.getpid()})...')
start = time.time()
total_count = 0
for k in range(5 * 10 ** 7):
total_count += 1
total_count += i
end = time.time()
print(f'Task {i} runs {end - start:.2f} seconds.')
return total_count
def main():
print('Parent process %s.' % os.getpid())
a = array.array('d', range(20))
r = range(20)
with Pool() as pool:
for idx, result in zip(r, pool.map(long_time_task, r)):
a[idx] = result
print(a)
print(f'All subprocesses done.')
if __name__ == '__main__':
main()
Need some help with code below, there have to be something wrong with it because I get better results with sequential sort than with parallel version.I'm new to python and especially parallel programming and any help would be welcome.
import random, time
from multiprocessing import Process, Pipe,cpu_count
from copy import deepcopy
def main():
create_list = [random.randint(1,1000) for x in range(25000)]
#sequential sort
sequentialsortlist=deepcopy(create_list)
start = time.time()
sorted2 =quicksort(sequentialsortlist)
elapsed = time.time() - start
print("sequential sort")
print(elapsed)
time.sleep(4)
#Parallel quicksort.
parallelsortlist = deepcopy(create_list)
start = time.time()
n = cpu_count()
pconn, cconn = Pipe()
p = Process(target=quicksortParallel,
args=(parallelsortlist, cconn, n,))
p.start()
lyst = pconn.recv()
p.join()
elapsed = time.time() - start
print("Parallels sort")
print(elapsed)
def quicksort(lyst):
less = []
pivotList = []
more = []
if len(lyst) <= 1:
return lyst
else:
pivot = lyst[0]
for i in lyst:
if i < pivot:
less.append(i)
elif i > pivot:
more.append(i)
else:
pivotList.append(i)
less = quicksort(less)
more = quicksort(more)
return less + pivotList + more
def quicksortParallel(lyst, conn, procNum):
less = []
pivotList = []
more = []
if procNum <= 0 or len(lyst) <= 1:
conn.send(quicksort(lyst))
conn.close()
return
else:
pivot = lyst[0]
for i in lyst:
if i < pivot:
less.append(i)
elif i > pivot:
more.append(i)
else:
pivotList.append(i)
pconnLeft, cconnLeft = Pipe()
leftProc = Process(target=quicksortParallel,
args=(less, cconnLeft, procNum - 1))
pconnRight, cconnRight = Pipe()
rightProc = Process(target=quicksortParallel,
args=(more, cconnRight, procNum - 1))
leftProc.start()
rightProc.start()
conn.send(pconnLeft.recv()+pivotList + pconnRight.recv())
conn.close()
leftProc.join()
rightProc.join()
if __name__ == '__main__':
main()
The simple answer is that the overhead of setting up your parallel execution environment and then re-joining it at the end is more expensive then the performance increase gained from the parallelism.
Multi-processing actually forks sub-processes. That's very expensive. It only makes sense to do this if the amount of work done in each thread is very large.
This kind of problem is actually pretty common when people neivly try and parallelize code. It's pretty common that for many 'reasonable' workloads the single-threaded implementation winds up being faster.
There is a cost associated with starting/terminating a process. Interprocess communication is not free, either. So the overhead is just too big.