I want to utilize multiple processors to calculate a function for two lists of values. In the test case below, my idea is that I have two lists: c = [1,2,3], and c_shift = [2,3,4]. I want to evaluate a function for a single value in each list, and append two separate solution arrays.
import numpy as np
import multiprocessing as mp
def function(x,a,b,c):
return a*x**2+b*x+c
def calculate(x,a,b,c):
c_shift = c+1
result = []
result_shift = []
for i in range(len(c)):
process0 = mp.Process(target = function, args = (x,a,b,c[i]))
process1 = mp.Process(target = function, args = (x,a,b,c_shift[i]))
process0.start()
process1.start()
process0.join()
process1.join()
# After it finishes, how do I append each list?
return np.array(result), np.array(result_shift)
if __name__ == '__main__':
x = np.linspace(-1,1,50)
a = 1
b = 1
c = np.array([1,2,3])
calculate(x,a,b,c)
When each process finishes, and goes through join(), how do I append process0 to result = [] and process1 to result_shift = []?
The structure of the returned results should have the form:
result = [ [1 x 50], [1 x 50], [1 x 50] ]
result_shifted = [ [1 x 50], [1 x 50], [1 x 50] ]
Slightly different approach but I think this is what you were looking to do?
import multiprocessing
import numpy as np
from functools import partial
def your_func(c, your_x, a, b):
results = []
for c_value in c:
results.append(a * your_x ** 2 + b * your_x + c_value)
return results
def get_results(c_values):
your_x = np.linspace(-1, 1, 50)
a = 1
b = 1
with multiprocessing.Pool() as pool:
single_arg_function = partial(your_func, your_x=your_x, a=a, b=b)
out = pool.map(single_arg_function, c_values)
return out
if __name__ == "__main__":
c_values = [np.array([1, 2, 3]), np.array([1, 2, 3]) + 1]
out = get_results(c_values)
result_one = out[0]
result_two = out[1]
I'm not sure exactly what you're trying to accomplish with the shifted results, but in terms of concurrency, you should check out concurrent.futures to execute parallel tasks. Also, take a look at functools.partial to create a partial object - essentially a function with pre-filled args / kwargs. Here's an example:
import concurrent.futures
from functools import partial
import numpy as np
def map_processes(func, _iterable):
with concurrent.futures.ProcessPoolExecutor() as executor:
result = executor.map(func, _iterable)
return result
def function(x, a, b, c):
return a * x**2 + b * (x + c)
if __name__ == "__main__":
base_func = partial(function, np.linspace(-1, 1, 50), 1, 1)
print(list(map_processes(base_func, np.array([1, 2, 3]))))
Related
from multiprocessing import Pool
a=[1,2,3,4,5,6]
def func1(a):
return a**2
def func2(a):
x= np.zeros(1)
for i in a:
x += i
return x
if __name__ == "__main__":
pool = Pool( os.cpu_count())
results = pool.map(func1, a)
print(results)
and then I need
func2(results)
This is just a simple example of my problem . Please don't tell me to transfer a to numpy array first because my func2 is way more complicated than this example.
Does anyone know how to do it please?
Thank you very much.
Maybe you are looking for something in this direction:
import multiprocessing as mp
import numpy as np
def func1(n):
return n ** 2
def func2(a):
pool = mp.Pool(mp.cpu_count())
results = pool.map(func1, a)
pool.close()
pool.join()
print(results)
# [1, 4, 9, 16, 25, 36]
x = np.sum(results)
return x
if __name__ == "__main__":
a = [1, 2, 3, 4, 5, 6]
out = func2(a)
print(out)
# 91
Is there a pythonic way to select N consecutive elements from a list or numpy array.
So Suppose:
Choice = [1,2,3,4,5,6]
I would like to create a new list of length N by randomly selecting element X in Choice along with the N-1 consecutive elements following choice.
So if:
X = 4
N = 4
The resulting list would be:
Selection = [5,6,1,2]
I think something similar to the following would work.
S = []
for i in range(X,X+N):
S.append(Selection[i%6])
But I was wondering if there is a python or numpy function that can select the elements at once that was more efficient.
Use itertools, specifically islice and cycle.
start = random.randint(0, len(Choice) - 1)
list(islice(cycle(Choice), start, start + n))
cycle(Choice) is an infinite sequence that repeats your original list, so that the slice start:start + n will wrap if necessary.
You could use a list comprehension, using modulo operations on the index to keep it in range of the list:
Choice = [1,2,3,4,5,6]
X = 4
N = 4
L = len(Choice)
Selection = [Choice[i % L] for i in range(X, X+N)]
print(Selection)
Output
[5, 6, 1, 2]
Note that if N is less than or equal to len(Choice), you can greatly simplify the code:
Choice = [1,2,3,4,5,6]
X = 4
N = 4
L = len(Choice)
Selection = Choice[X:X+N] if X+N <= L else Choice[X:] + Choice[:X+N-L]
print(Selection)
Since you are asking for the most efficient way I created a little benchmark to test the solutions proposed in this thread.
I rewrote your current solution as:
def op(choice, x):
n = len(choice)
selection = []
for i in range(x, x + n):
selection.append(choice[i % n])
return selection
Where choice is the input list and x is the random index.
These are the results if choice contains 1_000_000 random numbers:
chepner: 0.10840400000000017 s
nick: 0.2066781999999998 s
op: 0.25887470000000024 s
fountainhead: 0.3679908000000003 s
Full code
import random
from itertools import cycle, islice
from time import perf_counter as pc
import numpy as np
def op(choice, x):
n = len(choice)
selection = []
for i in range(x, x + n):
selection.append(choice[i % n])
return selection
def nick(choice, x):
n = len(choice)
return [choice[i % n] for i in range(x, x + n)]
def fountainhead(choice, x):
n = len(choice)
return np.take(choice, range(x, x + n), mode='wrap')
def chepner(choice, x):
n = len(choice)
return list(islice(cycle(choice), x, x + n))
results = []
n = 1_000_000
choice = random.sample(range(n), n)
x = random.randint(0, n - 1)
# Correctness
assert op(choice, x) == nick(choice,x) == chepner(choice,x) == list(fountainhead(choice,x))
# Benchmark
for f in op, nick, chepner, fountainhead:
t0 = pc()
f(choice, x)
t1 = pc()
results.append((t1 - t0, f))
for t, f in sorted(results):
print(f'{f.__name__}: {t} s')
If using a numpy array as the source, we could of course use numpy "fancy indexing".
So, if ChoiceArray is the numpy array equivalent of the list Choice, and if L is len(Choice) or len(ChoiceArray):
Selection = ChoiceArray [np.arange(X, N+X) % L]
Here's a numpy approach:
import numpy as np
Selection = np.take(Choice, range(X,N+X), mode='wrap')
Works even if Choice is a Python list rather than a numpy array.
I have a performance issue when coding with python.
let's say I have 2 very large arrays (Nx2) of strings say with N = 12,000,000, and two variables label_a and label_b which are also strings. Here is the following code:
import numpy as np
import time
indices = np.array([np.random.choice(np.arange(5000).astype(str),size=10000000),np.random.choice(np.arange(5000).astype(str),size=10000000)]).T
costs = np.random.uniform(size=10000000)
label_a = '2'
label_b = '9'
t0 = time.time()
costs = costs[(indices[:,0]!=label_a)*(indices[:,0]!=label_b)*(indices[:,1]!=label_a)*(indices[:,1]!=label_b)]
indices = indices[(indices[:,0]!=label_a)*(indices[:,0]!=label_b)*(indices[:,1]!=label_a)*(indices[:,1]!=label_b)]
t1 = time.time()
toseq = t1-t0
print(toseq)
the above code segment takes 3 seconds every time it's ran. I would like to achieve the same thing while reducing the computing cost:
I am using a boolean mask to only retrieve rows in the costs and indices arrays where the values are not label_a and label_b
As indicated in the comments, computing the values of the indices you're after only once, and combining them only once would save time.
(I've also changed the way of timing, just for brevity - the results are the same)
import numpy as np
from timeit import timeit
r = 5000
n = 10000000
indices = np.array([
np.random.choice(np.arange(r).astype(str), size=n),
np.random.choice(np.arange(r).astype(str), size=n)
]).T
costs = np.random.uniform(size=n)
label_a = '2'
label_b = '9'
n_indices = np.array([
np.random.choice(np.arange(r), size=n),
np.random.choice(np.arange(r), size=n)
]).T
def run():
global indices
global costs
_ = costs[(indices[:, 0] != label_a)*(indices[:, 0] != label_b) *
(indices[:, 1] != label_a)*(indices[:, 1] != label_b)]
_ = indices[(indices[:, 0] != label_a)*(indices[:, 0] != label_b) *
(indices[:, 1] != label_a)*(indices[:, 1] != label_b)]
def run_faster():
global indices
global costs
# only compute these only once
not_a0 = indices[:, 0] != label_a
not_b0 = indices[:, 0] != label_b
not_a1 = indices[:, 1] != label_a
not_b1 = indices[:, 1] != label_b
_ = costs[not_a0 * not_b0 * not_a1 * not_b1]
_ = indices[not_a0 * not_b0 * not_a1 * not_b1]
def run_even_faster():
global indices
global costs
# also combine them only once
cond = ((indices[:, 0] != label_a) * (indices[:, 0] != label_b) *
(indices[:, 1] != label_a) * (indices[:, 1] != label_b))
_ = costs[cond]
_ = indices[cond]
def run_sep_mask():
global indices
global costs
global cond
# just the masking part of run_even_faster
cond = ((indices[:, 0] != label_a) * (indices[:, 0] != label_b) *
(indices[:, 1] != label_a) * (indices[:, 1] != label_b))
def run_sep_index():
global indices
global costs
global cond
# just the indexing part of run_even_faster
_ = costs[cond]
_ = indices[cond]
def run_even_faster_numerical():
global indices
global costs
# use int values and n_indices instead of indices
a = int(label_a)
b = int(label_b)
cond = ((n_indices[:, 0] != a) * (n_indices[:, 0] != b) *
(n_indices[:, 1] != a) * (n_indices[:, 1] != b))
_ = costs[cond]
_ = indices[cond]
def run_all(funcs):
for f in funcs:
print('{:.4f} : {}()'.format(timeit(f, number=1), f.__name__))
run_all([run, run_faster, run_even_faster, run_sep_mask, run_sep_index, run_even_faster_numerical])
Note that I also added an example where the operation is not based on strings, but on numbers instead. If you can avoid the values being strings, but get numbers instead, you'd get a performance boost as well.
This boost gets substantial if you start comparing longer labels - in the end it might even be worth converting the strings to numbers before the filtering, if the strings get long enough.
These are my results:
0.9711 : run()
0.7065 : run_faster()
0.6983 : run_even_faster()
0.2657 : run_sep_mask()
0.4174 : run_sep_index()
0.4536 : run_even_faster_numerical()
The two sep entries show that the indexing is about twice the amount of time it takes to build the mask for run_even_faster, so you can only expect so much improvement from tuning it even more.
However, they also show that building the mask based on integers is less than 0.04 seconds on top of doing the actual indexing, compared to the about 0.26 seconds for building the mask based on strings. So, that's the room you have for improvement.
I have a multidimensional array (result) that should be filled by some nested loops. Function fun() is a complex and time-consuming function. I want to fill my array elements in a parallel manner, so I can use all my system's processing power.
Here's the code:
import numpy as np
def fun(x, y, z):
# time-consuming computation...
# ...
return output
dim1 = 10
dim2 = 20
dim3 = 30
result = np.zeros([dim1, dim2, dim3])
for i in xrange(dim1):
for j in xrange(dim2):
for k in xrange(dim3):
result[i, j, k] = fun(i, j, k)
My question is that "Can I parallelize this code or not? if yes, How?"
I'm using Windows 10 64-bit and python 2.7.
Please provide your solution by changing my code if you can.
Thanks!
If you want a more general solution, taking advantage of fully parallel execution, then why not use something like this:
>>> import multiprocess as mp
>>> p = mp.Pool()
>>>
>>> # a time consuming function taking x,y,z,...
>>> def fun(*args):
... import time
... time.sleep(.1)
... return sum(*args)
...
>>> dim1, dim2, dim3 = 10, 20, 30
>>> import itertools
>>> input = ((i,j,k) for i,j,k in itertools.combinations_with_replacement(xrange(dim3), 3) if i < dim1 and j < dim2)
>>> results = p.map(fun, input)
>>> p.close()
>>> p.join()
>>>
>>> results[:2]
[0, 1]
>>> results[-2:]
[56, 57]
Note I'm using multiprocess instead of multiprocessing, but that's only to get the ability to work in the interpreter.
I didn't use a numpy.array, but if you had to... you could just dump the output from p.map directly into a numpy.array and then modify the shape attribute to be shape = (dim1, dim2, dim3), or you could do something like this:
>>> input = ((i,j,k) for i,j,k in itertools.combinations_with_replacement(xrange(dim3), 3) if i < dim1 and j < dim2)
>>> import numpy as np
>>> results = np.empty(dim1*dim2*dim3)
>>> res = p.imap(fun, input)
>>> for i,r in enumerate(res):
... results[i] = r
...
>>> results.shape = (dim1,dim2,dim3)
Here is a version of code that runs fun(i, j, k) in parallel for differend k indices. This is done by running fun in different processes by using https://docs.python.org/2/library/multiprocessing.html
import numpy as np
from multiprocessing import Pool
def fun(x, y, z):
# time-consuming computation...
# ...
return output
def fun_wrapper(indices):
fun(*indices)
if __name__ == '__main__':
dim1 = 10
dim2 = 20
dim3 = 30
result = np.zeros([dim1, dim2, dim3])
pool = Pool(processes=8)
for i in xrange(dim1):
for j in xrange(dim2):
result[i, j] = pool.map(fun_wrapper, [(i, j, k) for k in xrange(dim3)])
This is not the most elegant solution but you may start with it. And you will get a speed up only if fun contains time-consuming computation
A simple approach could be to divide the array in sections and create some threads to operate throught these sections. For example one section from (0,0,0) to (5,10,15) and other one from (5,10,16) to (10,20,30).
You can use threading module and do something like this
import numpy as np
import threading as t
def fun(x, y, z):
# time-consuming computation...
# ...
return output
dim1 = 10
dim2 = 20
dim3 = 30
result = np.zeros([dim1, dim2, dim3])
#b - beginning index, e - end index
def work(ib,jb,kb,ie,je,ke):
for i in xrange(ib,ie):
for j in xrange(jb,je):
for k in xrange(kb,ke):
result[i, j, k] = fun(i, j, k)
threads = list()
threads.append(t.Thread(target=work, args(0,0,0,dim1/2,dim2/2,dim3/2))
threads.append(t.Thread(target=work, args(dim1/2,dim2/2,dim3/2 +1,dim1, dim2, dim3))
for thread in threads:
thread.start()
You can define these sections through some algorithm and determine the number of threads dynamically. Hope it helps you or at least give you some ideas.
Say I have a function that calculates and returns an value:
h = 4.13566733*10**-15
c = 2.99792458*10**(8+9)
def func(x):
for i in w:
A = h*c/i
return A
w = [1]
print func(w)
Fine. But if w is a larger array, say:
w = [1 ,2, 3, 4]
The func returns for the last value in w (4). Understandebly as that is the last item in the for-loop.
But how can I make the func return an array with all 4 values, something like:
[1239, 619, 413, 309]
??
Python supports passing multiple arguments, and returning multiple values. That said, you can change your code to:
def func(w):
return [h*c/i for i in w]
If you now call this, you can get the required array:
>>> w = [1 ,2, 3, 4]
>>> func(w)
[1239.8418743309974, 619.9209371654987, 413.2806247769991, 309.96046858274934]
As for calling with multiple arguments and returning multiple examples, consider the following example method, which takes 2 inputs and returns 3 outputs:
>>> def get_product_modulo_dividend(x, y):
... return x*y, x%y, x/y
>>> get_product_modulo_dividend(100, 10)
(1000, 0, 10)
Make your A a list and append each result to that list
h = 4.13566733*10**-15
c = 2.99792458*10**(8+9)
def func(x):
A = []
for i in w:
A.append(h*c/i)
return A
w = [1,2,3,4]
print func(w)
This outputs:
[1239.8418743309974, 619.92093716549869, 413.2806247769991, 309.96046858274934]
this is similar to what at #mu posted, but it seems like your function is operating on single values that are not connected together and might be more flexible implemented as only taking a number as a param.
h = 4.13566733*10**-15
c = 2.99792458*10**(8+9)
def func(x):
return h*c / x
w = [1,2,3,4]
print([func(x) for x in w])
You can use map, for example.
h = 4.13566733*10**-15
c = 2.99792458*10**(8+9)
def func(x):
return h*c/x
w = [1,2,3]
print map(func, w)
Will return [1239.8418743309974, 619.9209371654987, 413.2806247769991]
And you can use more elegant way (as for me):
h = 4.13566733*10**-15
c = 2.99792458*10**(8+9)
w = [1,2,3]
result = map(lambda (x): h*c/x, w)
Is returns also [1239.8418743309974, 619.9209371654987, 413.2806247769991].