Improving execution time for multiprocessing - python

I am trying to improve the performance of this code in terms of time without success for now. Even running in 32 processes takes like 5m. Maybe do you have any suggestions to improve the time of this code? Here evaluated_f_bool_func_lst is a list with 2**24 elements. The elements of this list are 1-length strings with values of '1' or '0' ("binary list").
from sage.all import *
import time
from multiprocessing import Pool
import multiprocessing
def create_ext_component_function_i(dim, chunk_i, chunk_size, evaluated_f_bool_func_lst):
sum_y_str = []
for y in range(chunk_i, chunk_i + chunk_size):
prod = ""
for i in range(dim):
minus1 = ((-1)**(1&(y>>(i))))
prod += f'(1-{str(minus1)}*x[{str(i)}])*'
sum_y_str.append(f'{prod}{evaluated_f_bool_func_lst[y]}')
return "+".join(sum_y_str)
def create_ext_component_function(dim, evaluated_f_bool_func_lst):
sum_y = ""
chunk_size = (2**dim)/32
pool = Pool(32)
results = []
for i in range(0, 2**dim, chunk_size):
results.append(pool.apply_async(create_ext_component_function_i, args=(dim, i, chunk_size, evaluated_f_bool_func_lst)))
pool.close()
pool.join()
join_results = [result.get() for result in results]
print("+".join(join_results))
return 0
if __name__ == '__main__':
evaluated_f_bool_func_lst = load("evaluated_f_bool_func_lst.obj")
dim = 24
create_ext_component_function(dim, evaluated_f_bool_func_lst)

Related

How to return a dictionary from a process in Python?

I want to make an inverted index using multiprocessing to speed up its work. My idea is to split the files into groups, and each process will build its own inverted index, and then I want to merge all these indexes into one inverted index. But I don't know how to return them to the main process that will merge them.
import multiprocessing as mp
from pathlib import Path
import re
import time
class InvertedIndex:
def __init__(self):
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt'))
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num
processes = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
currLi = pathList[startIndex:endIndex]
p = mp.Process(target=self.oneProcessTask, args=(currLi,))
processes.append(p)
[x.start() for x in processes]
[x.join() for x in processes]
#staticmethod
def oneProcessTask(listOfDoc):
#print(f'Start: {list[0]}, end: {list[-1]}') # temp
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
def getListOfDoc(self, keyWord):
return self.index[keyWord]
if __name__ == '__main__':
ii = InvertedIndex()
start_time = time.time()
ii.createIndex()
print("--- %s seconds ---" % (time.time() - start_time))
I used multiprocessing.manager to write everything in one dictionary, but that solution was too slow. So I went back to the idea of creating own inverted index for each process and then merging them. But I don't know how to return all indexes to one process.
Take a look at concurrent.futures (native library) with either ThreadPoolExecutor or ProcessPoolExecutor. FYI: I wrote on that in here and did not test but, this is more or less the jist of what I use all the time.
from concurrent.futures import ThreadPoolExecutor, as_completed
def foo(stuff: int) -> dict:
return {}
things_to_analyze = [1,2,3]
threads = []
results = []
with ThreadPoolExecutor() as executor:
for things in things_to_analyze:
threads.append(executor.submit(foo, thing))
for job in as_completed(threads):
results.append(job.results())
I found a solution. I used pool.starmap to return a list of indexes.
My code:
class InvertedIndex:
def __init__(self):
self.smallIndexes = None
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt')) # Рекурсивно проходимо по всіх текстових файлах і робимо з них список
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num # Розраховуємо скільки файлів має обробити один процес
processes_args = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
processes_args.append((path, startIndex, endIndex))
pool = mp.Pool(threads_num)
self.smallIndexes = pool.starmap(self.oneProcessTask, processes_args)
self.mergeIndex()
#staticmethod
def oneProcessTask(path, startIndex, endIndex):
pathList = list(Path(path).glob('**/*.txt'))
listOfDoc = pathList[startIndex:endIndex]
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
return tempDict
Execution time decreased from 200 seconds (when I used shared memory and menger.dict ) to 0.8 seconds (when I used pool.starmap).

Multiprocessing pool map doesn't accept several arguments as list of lists

How can I pass several argument to a pool.map correctly.
Currently I have an error saying :
File "C:/Users/maxime/Desktop/execom/di.py", line 42, in <module>
A = pool.map(energy2, args)
File "C:\Python36\lib\multiprocessing\pool.py", line 266, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Python36\lib\multiprocessing\pool.py", line 644, in get
raise self._value
TypeError: energy2() missing 2 required positional arguments: 'window' and 'i'
zip doesn't work neither.
I also try something with partial but if all arguments need to be defferent in each process, it is then pointless.
import numpy as np
from numba import njit
import multiprocessing
from functools import partial
from itertools import repeat
import itertools
def energy2(signal, window,i ):
L2 = int(len(window ) /2)
Lw = len(window)
taille = len(signal)
channel_buffer = np.zeros(len(signal))
filtered_signalI = np.hstack((np.zeros(L2) ,signal ,np.zeros(len(window))))
for k in range(0 ,taille):
buffer = (filtered_signalI[k : k + Lw ] * window)
channel_buffer[k] = np.sqrt(np.sum(buffer * buffer))
return channel_buffer / Lw
if __name__=="__main__":
multiprocessing.freeze_support()
# serial
window = np.random.rand(32)
N = 10
Signals = np.zeros((N,1000))
for i in range(N):
Signals[i,:] = np.random.rand(1000)
Res = np.zeros(Signals.shape)
for i in range(N):
Res[i, :] = energy2(Signals[i, :], window,i)
print(Res)
# parallel
cpu_nb = 11 # multiprocessin
Res2 = np.zeros(Signals.shape)
idx = 0
pool = multiprocessing.Pool(cpu_nb)
args = []
for h in range(N):
args.append([window,Signals[h, :],h])
A = pool.map(energy2, args)
for imap in range(len(A)):
Res2[imap, :] = A[imap]
print(Res - Res2) #find same results
I think the answer to this lies in starmap. See this for a previous solution. I'm not sure on the format of your inputs, but my guess is that you will need to zip them if you do indeed have a list of lists.
if __name__=="__main__":
multiprocessing.freeze_support()
# serial
window = np.random.rand(32)
N = 10
Signals = np.zeros((N,1000))
for i in range(N):
Signals[i,:] = np.random.rand(1000)
Res = np.zeros(Signals.shape)
for i in range(N):
Res[i, :] = energy2(Signals[i, :], window,i)
print(Res)
# parallel
cpu_nb = 11 # multiprocessin
Res2 = np.zeros(Signals.shape)
idx = 0
pool = multiprocessing.Pool(cpu_nb)
args = []
for h in range(N):
args.append([window,Signals[h, :],h])
A = pool.starmap(energy2, args)
for imap in range(len(A)):
Res2[imap, :] = A[imap]
print(Res - Res2) #find same results
That is a lot of code, but I think changing the energy2 function to:
def energy2(items):
signal, window, i = items
...
would do the trick. The problem seems to be that you are passing a list to a function that expects 3 values. Normally you can do this by using a starred expression but in your case it is a bit more delicate than that since you are using map.. So let the function handle its input.

Python multiprocessing global numpy arrays

I have following script:
max_number = 100000
minimums = np.full((max_number), np.inf, dtype=np.float32)
data = np.zeros((max_number, 128, 128, 128), dtype=np.uint8)
if __name__ == '__main__':
main()
def worker(array, start, end):
for in_idx in range(start, end):
value = data[start:end][in_idx] # compute something using this array
minimums[in_idx] = value
def main():
jobs = []
num_jobs = 5
for i in range(num_jobs):
start = int(i * (1000 / num_jobs))
end = int(start + (1000 / num_jobs))
p = multiprocessing.Process(name=('worker_' + str(i)), target=worker, args=(start, end))
jobs.append(p)
p.start()
for proc in jobs:
proc.join()
print(jobs)
How can I ensure that the numpy array is global and can be accessed by each worker? Each worker uses a different part of the numpy array
import numpy as np
import multiprocessing as mp
ar = np.zeros((5,5))
def callback_function(result):
x,y,data = result
ar[x,y] = data
def worker(num):
data = ar[num,num]+3
return num, num, data
def apply_async_with_callback():
pool = mp.Pool(processes=5)
for i in range(5):
pool.apply_async(worker, args = (i, ), callback = callback_function)
pool.close()
pool.join()
print "Multiprocessing done!"
if __name__ == '__main__':
ar = np.ones((5,5)) #This will be used, as local scope comes before global scope
apply_async_with_callback()
Explanation: You set up your data array and your workers and callback functions. The number of processes in the pool set up a number of independent workers, where each worker can do more than one task. The callback writes the result back to the array.
The __name__=='__main__' protects the following line from being run at each import.

How can I parallel parsing in python?

I have the following code which converts graph from edges list to adjacency matrix:
for line in open('graph.txt'):
converted = [sparse_to_dense.get(int(ID)) for ID in line.split()]
i = converted[0]
j = converted[1]
I.append(i)
J.append(j)
n = max([max(I), max(J)]) + 1
data = [1]*len(I)
return coo_matrix((data, (I,J)), shape=(n,n), dtype='i1')
This code is awfully slow -- on may machine conversion of 500k edges takes hours. On the other hand i/o is obviously is not bottleneck (I can read full file in memory almost instantaneously) so I think there is a room for parallelism. But I'm not sure how to proceed: should I read file in parallel or something?
Use multiprocessing one way to do it is this. I did not check and could be further improved
import multiprocessing
class Worker(multiprocessing.Process):
def __init__(self, queue, results):
multiprocessing.Process.__init__(self):
self.q = queue
self.results = results
def run(self):
while True:
try:
lineno, linecontents = self.q.get(block=False)
except Queue.Empty:
break
converted = [sparse_to_dense.get(int(ID)) for ID in line.split()]
i = converted[0]
j = converted[1]
self.results.put((i, j))
def main():
q = multiprocessing.Queue()
results = multiprocessing.JoinableQueue()
for i, l in open(fname):
q.put((i, l))
for _ in xrange(4):
w = Worker(q, results)
w.start()
I, J = []
while True:
try:
i, j = results.get(block=False)
except Queue.Empty:
break
I.append(i)
J.append(j)
results.task_done()
results.join()
n = max([max(I), max(J)]) + 1
data = [1]*len(I)
coo = coo_matrix((data, (I,J)), shape=(n,n), dtype='i1')

How to let a multi-processing python application quit cleanly

When I run a python script that uses multiprocessing I find it hard to get it to stop cleanly when it receives Ctrl-C. Ctrl-C has to be pressed multiple times and all sorts of error messages appear on the screen.
How can you make a python script that uses multiprocessing and quits
cleanly when it receives a Ctrl-C ?
Take this script for example
import numpy as np, time
from multiprocessing import Pool
def countconvolve(N):
np.random.seed() # ensure seed is random
count = 0
iters = 1000000 # 1million
l=12
k=12
l0=l+k-1
for n in range(N):
t = np.random.choice(np.array([-1,1], dtype=np.int8), size=l0 * iters)
v = np.random.choice(np.array([-1,1], dtype=np.int8), size = l * iters)
for i in xrange(iters):
if (not np.convolve(v[(l*i):(l*(i+1))],
t[(l0*i):(l0*(i+1))], 'valid').any()):
count += 1
return count
if __name__ == '__main__':
start = time.clock()
num_processes = 8
N = 13
pool = Pool(processes=num_processes)
res = pool.map(countconvolve, [N] * num_processes)
print res, sum(res)
print (time.clock() - start)
Jon's solution is probably better, but here it is using a signal handler. I tried it in a VBox VM which was extremely slow, but worked. I hope it will help.
import numpy as np, time
from multiprocessing import Pool
import signal
# define pool as global
pool = None
def term_signal_handler(signum, frame):
global pool
print 'CTRL-C pressed'
try:
pool.close()
pool.join()
except AttributeError:
print 'Pool has been already closed'
def countconvolve(N):
np.random.seed() # ensure seed is random
count = 0
iters = 1000000 # 1million
l=12
k=12
l0=l+k-1
for n in range(N):
t = np.random.choice(np.array([-1,1], dtype=np.int8), size=l0 * iters)
v = np.random.choice(np.array([-1,1], dtype=np.int8), size = l * iters)
for i in xrange(iters):
if (not np.convolve(v[(l*i):(l*(i+1))],t[(l0*i):(l0*(i+1))], 'valid').any()):
count += 1
return count
if __name__ == '__main__':
# Register the signal handler
signal.signal(signal.SIGINT, term_signal_handler)
start = time.clock()
num_processes = 8
N = 13
pool = Pool(processes=num_processes)
res = pool.map(countconvolve, [N] * num_processes)
print res, sum(res)
print (time.clock() - start)
I believe the try-catch mentioned in a similar post here on SO could be adapted to cover it.
If you wrap the pool.map call in the try-catch and then call terminate and join I think that would do it.
[Edit]
Some experimentation suggests something along these lines works well:
from multiprocessing import Pool
import random
import time
def countconvolve(N):
try:
sleepTime = random.randint(0,5)
time.sleep(sleepTime)
count = sleepTime
except KeyboardInterrupt as e:
pass
return count
if __name__ == '__main__':
random.seed(0)
start = time.clock()
num_processes = 8
N = 13
pool = Pool(processes=num_processes)
try:
res = pool.map(countconvolve, [N] * num_processes)
print res, sum(res)
print (time.clock() - start)
except KeyboardInterrupt as e:
print 'Stopping..'
I simplified your example somewhat to avoid having to load numpy on my machine to test but the critical part is the two try-except calls which handle the CTRL+C key presses.

Categories

Resources