python appending does not work while using multiprocessing - python

So the problem is that appending "e" to my list "ok = []" does not have an effect, but that is weird considering that, when doing print(e) just one line above ok.append(e), the value of "e" is printed out as it should.
no need to understand the program and what it does, the main issure here is just that appending some value to my list does not have effect, even though the value is real.
I tried to use ok = [] inside of if __name__=='__main__': however that gave me the error NameError: name 'ok' is not defined so i then tried to use "global ok" inside of "some_function" however that gave me the same results
import time
import multiprocessing as mp
ratios1 = [1/x for x in range(1,11)]
ratios2 = [y/1 for y in range(1,11)]
x = 283
y = 436
ok = []
def some_function(x_, y_):
list_ = [[a, b] for a in range(1, 1980 + 1) for b in range(1, 1980 + 1) if a / b == x_ / y_]
for e in list_:
if not e[0] in [h[0] for h in ok]:
if not e[1] in [u[1] for u in ok]:
print(e)
ok.append(e)
if __name__=='__main__':
processes = []
if x / y in ratios1 or x / y in ratios2:
some_function(x_=x, y_=y)
else:
for X_, Y_ in [
[x, y],
[x - 1, y], [x, y - 1], [x + 1, y], [x, y + 1],
[x - 2, y], [x, y - 2], [x + 2, y], [x, y + 2],
[x - 3, y], [x, y - 3], [x + 3, y], [x, y + 3]
]:
p = mp.Process(target=some_function, args=(X_,Y_))
processes.append(p)
start = time.time()
for p_ in processes:
p_.start()
for p_ in processes:
p_.join()
end = time.time()
print(f"finished in {end - start} sec")
print(ok)
when running this is output:
[...] # other values of "e"
[283, 433] # some random "e" value
[566, 866] # some random "e" value
[849, 1299] # some random "e" value
[1132, 1732] # some random "e" value
finished in 0.8476874828338623 sec # execution time
[] # the "ok" list being printed out at the end
after adding print(id(ok)) both in "some_function" and in the end, it gives me the following output:
OBS: I removed print(e) for this output
2489040444480
3014871358528
2324227431488
2471301880896
1803966487616
2531583073344
1665411652672
2149818113088
2330038901824
1283883998272
2498472320064
2147028311104
2509405887552
finished in 0.8341867923736572 sec
2589544128640
[]

you need a list that can be accessed from more than one process, which is made by using a multiprocessing.Manager.list, and you have to pass it as an argument, you cannot have it as a global, as inheriting globals is OS sepecific.
using a managed list is slower than a normal list, so if you find the performance unacceptable you should really try to work with only local variables and forget about using globals, as IPC is an expensive process.
import time
import multiprocessing as mp
ratios1 = [1/x for x in range(1,11)]
ratios2 = [y/1 for y in range(1,11)]
x = 283
y = 436
def some_function(x_, y_, ok_list):
list_ = [[a, b] for a in range(1, 1980 + 1) for b in range(1, 1980 + 1) if a / b == x_ / y_]
for e in list_:
if not e[0] in [h[0] for h in ok_list]:
if not e[1] in [u[1] for u in ok_list]:
print(e)
ok_list.append(e)
if __name__=='__main__':
manager = mp.Manager()
ok_list = manager.list()
processes = []
if x / y in ratios1 or x / y in ratios2:
some_function(x_=x, y_=y)
else:
for X_, Y_ in [
[x, y],
[x - 1, y], [x, y - 1], [x + 1, y], [x, y + 1],
[x - 2, y], [x, y - 2], [x + 2, y], [x, y + 2],
[x - 3, y], [x, y - 3], [x + 3, y], [x, y + 3]
]:
p = mp.Process(target=some_function, args=(X_,Y_,ok_list))
processes.append(p)
start = time.time()
for p_ in processes:
p_.start()
for p_ in processes:
p_.join()
end = time.time()
print(f"finished in {end - start} sec")
print(ok_list)

This should work, the problem was that when you start the process the objects it uses are not really passed to it as much as they are cloned. Using muliprocessing.Pool.starmap allows us to return values from the process which circumvents this issue.
We use starmap and not just map, so that we can pass multiple parameters to some_function. Additionally the Pool lets you replace the for X_,Y_ in ... loop and run it multiprocessed.
import time
import multiprocessing as mp
from multiprocessing import Pool
ratios1 = [1/x for x in range(1,11)]
ratios2 = [y/1 for y in range(1,11)]
x = 283
y = 436
ok = []
def some_function(x_, y_):
list_ = [[a, b] for a in range(1, 1980 + 1) for b in range(1, 1980 + 1) if a / b == x_ / y_]
for e in list_:
if not e[0] in [h[0] for h in ok]:
if not e[1] in [u[1] for u in ok]:
print(e)
ok.append(e)
return ok
if __name__=='__main__':
processes = []
res=[]
if x / y in ratios1 or x / y in ratios2:
some_function(x_=x, y_=y)
else:
start = time.time()
with Pool(13) as p:
res = p.starmap(some_function, [[x, y],
[x - 1, y], [x, y - 1], [x + 1, y], [x, y + 1],
[x - 2, y], [x, y - 2], [x + 2, y], [x, y + 2],
[x - 3, y], [x, y - 3], [x + 3, y], [x, y + 3]])
ok = res
end = time.time()
print(f"finished in {end - start} sec")
print(ok)

Related

Plotting a function of two variables based on data from a file

Suppose the values of a function are written line by line in the file in the form x y f(x, y) and I read this file into the list of lists [ [x1, y1, f(x1, y1)], ..., [xN, yN, f(xN, yN)] ]:
with open('data.txt', 'r') as file:
data = [[float(x) for x in line.split()] for line in file]
The question is how to plot this function.
I managed to write a program (see below) that implements the task (the values for data are taken as an example), but it looks too complicated and, it seems to me, there is a more elegant solution.
import numpy as np
import matplotlib.pyplot as plt
data = [[0, 0, 1], [0, 1, 1], [1, 0, 2], [1, 1, 3]]
x, y, _ = np.array(data).T.tolist()
x = list(set(x))
y = list(set(y))
def f(x, y):
for val in data:
if x == val[0] and y == val[1]:
return val[2]
X, Y = np.meshgrid(x, y)
Z = [[f(x_, y_) for x_ in x] for y_ in y]
cp = plt.contourf(Y, X, Z)
plt.colorbar(cp)
plt.show()
Therefore, I think it is more correct to ask how to solve the problem gracefully, elegantly.
I found a way to significantly speed up the filling of Z. The solution, of course, is not elegant.
import numpy as np
import matplotlib.pyplot as plt
data = [[0, 0, 1], [0, 1, 1], [1, 0, 2], [1, 1, 3]]
x, y, _ = np.array(data).T.tolist()
x = list(set(x))
y = list(set(y))
X, Y = np.meshgrid(x, y)
#####
Z = np.zeros((len(x), len(y)))
x_ind = {xx[1] : xx[0] for xx in enumerate(x)}
y_ind = {yy[1] : yy[0] for yy in enumerate(y)}
for d in data:
xx, yy, zz = d
Z[x_ind[xx], y_ind[yy]] = zz
#####
cp = plt.contourf(Y, X, Z)
plt.colorbar(cp)
plt.show()
I compare these two approaches with a simple code:
import numpy as np
import sys
import time
def f1(data, x, y):
for val in data:
if x == val[0] and y == val[1]:
return val[2]
def init1(data, x, y):
Z = [[f1(data, x_, y_) for x_ in x] for y_ in y]
return Z
def init2(data, x, y):
Z = np.zeros((len(x), len(y)))
x_ind = {xx[1] : xx[0] for xx in enumerate(x)}
y_ind = {yy[1] : yy[0] for yy in enumerate(y)}
for d in data:
xx, yy, zz = d
Z[x_ind[xx], y_ind[yy]] = zz
return Z
def test(n):
data = []
x = y = [nn / n for nn in range(n+1)]
for xx in x:
for yy in y:
data.append([xx, yy, 0.])
t1 = time.time()
init1(data, x, y)
dt1 = time.time() - t1
t2 = time.time()
init2(data, x, y)
dt2 = time.time() - t2
print(f'n = {n:5d} ; t1 = {dt1:10.3f} s ; t2 = {dt2:10.3f} s')
def main():
n = 10
if len(sys.argv) > 1:
try:
n = int(sys.argv[1])
except:
print(f'Can not convert "{sys.argv[1]}" to integer')
exit(0)
if n <= 0:
print(f'n is negative or equal zero')
exit(0)
test(n)
if __name__ == '__main__':
main()
Without specifying the characteristics of the machine on which the program was run, I will only give the result of its work for n = 100 and n = 200:
$ python test.py 100 ; python test.py 200
n = 100 ; t1 = 1.092 s ; t2 = 0.001 s
n = 200 ; t1 = 19.312 s ; t2 = 0.005 s
Of course, this is still an inefficient way. So, for example, it will take 2 seconds for a 4000 by 4000 grid.
I want to note that the new method works acceptably on small and medium amounts of data, and the operating time of matplotlib is significantly longer. On large amounts of data, the problems are primarily related to matplotlib.
I think that, although the solution is not elegant, it solves the problem at an acceptable speed. To be honest, I'm not even sure that the result can be significantly accelerated.

Inputting values into a matrix

I'm trying to substitute the values into my matrix ( using these commands
J.subs({x:0.1, y: 0.1, z: -0.1})
f.subs({x:0.1, y: 0.1, z: -0.1}) ) but it isn't working. How can I fix this or go around this as the values aren't being substituted into my equations inside the matrices?
import sympy as sp
# Create Symbols
x = sp.Symbol('x')
y = sp.Symbol('y')
z = sp.Symbol('z')
# Input Equations
eq1 = 3*x - sp.cos(y*z) - 1/2
eq2 = x**2 -81*(y+0.1)**2 + sp.sin(z) + 1.06
eq3 = sp.exp(-x*y) + 20*z + (10*sp.pi - 3)/3
# Create a blank matrix that works with symbols
J = sp.Matrix([[0,0,0],[0,0,0],[0,0,0]])
# Create the function matrix
f = sp.Matrix([[eq1],[eq2],[eq3]])
# Create the Jacobian Matrix
J[0,0] = sp.diff(eq1,x)
J[1,0] = sp.diff(eq1,y)
J[2,0] = sp.diff(eq1,z)
J[0,1] = sp.diff(eq2,x)
J[1,1] = sp.diff(eq2,y)
J[2,1] = sp.diff(eq2,z)
J[0,2] = sp.diff(eq3,x)
J[1,2] = sp.diff(eq3,y)
J[2,2] = sp.diff(eq3,z)
# Create the inital guess matrix
p = sp.Array([[0.1], [0.1], [-0.1]])
# Substitute the values
J.subs({x:0.1, y: 0.1, z: -0.1})
f.subs({x:0.1, y: 0.1, z: -0.1})
print(J)
# Create the loop for 5 iterations
# Intialize counter
i = 0
# THe loop itself
#for i in range(0,4):
# p = p - A*f
# i += 1
# A.subs([(x, p[0,0]), (y, p[1,0]), (z, p[2,0])])
# f.subs([(x, p[0,0]), (y, p[1,0]), (z, p[2,0])])
#print(p)
Since subs returns a value after the substitutions have been made, you have to "catch" that updated value:
for i in range(0,4):
p = p - A*f
A = A.subs([(x, p[0,0]), (y, p[1,0]), (z, p[2,0])])
f = f.subs([(x, p[0,0]), (y, p[1,0]), (z, p[2,0])])
(And i is being controlled by the loop so you don't need to update i.)

sympy collect undefined functions

I have an expression in sympy that is a linear combination of an evaluated function, f. Schematically
expr = Sum_{m,n} c_{m,n} f(x+a_m,y+a_n)
where c_{m,n} is a coefficient depending on the variables x,y. A very simple example is
import sympy as sp
x, y = sp.symbols("x, y")
f = sp.Function("f")(x,y)
expr = 0
for i in range(0,3):
expr += (x-i)* f.subs({x: x+2*i, y: y+3*i})
In my actual code expr is the result of a long succession of sums and the function g not simplify like here. Is there an efficient way of grouping functions with different argument together, like collect(expr) does for polynomials? What I am after is to obtain a structured list:
In: someFunction(...)
Out: [..., [c_{m,n}, x+a_m, y+a_n ], ...]
in the example above
In: someFunction(expr)
Out: [[x, x, y], [x - 1, x + 2, y + 3], [x - 2, x + 4, y + 6]]
I'm not sure if this does exactly what you want but you can use pattern matching:
In [27]: expr
Out[27]: x⋅f(x, y) + (x - 2)⋅f(x + 4, y + 6) + (x - 1)⋅f(x + 2, y + 3)
In [28]: a, b, c = symbols('a, b, c', cls=Wild)
In [29]: pattern = a*f(b, c)
In [30]: for term in Add.make_args(expr):
...: print(term.match(pattern))
...:
{b_: x, c_: y, a_: x}
{b_: x + 2, c_: y + 3, a_: x - 1}
{b_: x + 4, c_: y + 6, a_: x - 2}

Creating a Running Total column in a matrix?

Related to this question I recently posted & resolved.
If the 3rd column of a matrix is to be changed to be the running total of the sums, how would I adjust my code to do so?
This is where I'm at so far:
def dice(n):
rolls = np.empty(shape=(n, 3),dtype=int)
for i in range(n):
x = random.randint(1,6)
y = random.randint(1,6)
if x in [1,3,5] or y in [1,3,5]:
sum_2_dice = x + y
z = np.cumsum(sum_2_dice)
rolls[i,:] = x, y, z
else:
sum_2_dice = -(x+y) # meaning you "lose the sum" basically
z = np.cumsum(sum_2_dice)
rolls[i,:] = x, y, z
return rolls `
So for example: dice(2)
returns
array[[2, 6, -8],
[1, 3, 4],
[5, 2, 7])
when it should really be returning:
array[[2, 6, -8],
[1, 3, -4],
[5, 2, 3])
I thought np.cumsum would be doing something, but I'm not sure. Is a while loop needed to do this (I'm not sure of where it'd be applied)? I've tried various adjustments like instead of having z = np.cumsum(sum_2_dice) I did sum_2_dice += sum_2_dice (consequently the code that followed it was rolls[i,:] = x, y, sum_2_dice but that was terribly wrong since all that ended up doing was doubling the sum values in every column, not doing any sort of running total calculations.
For your purposes, an easy way to keep track of z would be to initialise it outside of the loop, then keep adding the value of sum_2_dice.
def dice(n):
z = 0
rolls = np.empty(shape=(n, 3),dtype=int)
for i in range(n):
x = random.randint(1,6)
y = random.randint(1,6)
if x in [1,3,5] or y in [1,3,5]:
sum_2_dice = x + y
z += sum_2_dice
rolls[i,:] = x, y, z
else:
sum_2_dice = -(x+y) # meaning you "lose the sum" basically
z += sum_2_dice
rolls[i,:] = x, y, z
return rolls
print (dice(3))
#[[ 6 2 -8]
# [ 4 5 1]
# [ 1 5 7]]
For reference, numpy.cumsum is normally used to get the cumulative sum of the elements of arrays, for example:
test = np.arange(0,5,1)
print (test)
# [0 1 2 3 4]
print (np.cumsum(test))
# [0 1 3 6 10]

Numpy arange error with Lagrange Multiplier in Python

I try to use Lagrange multiplier to optimize a function, and I am trying to loop through the function to get a list of number, however I got the error
ValueError: setting an array element with a sequence.
Here is my code, where do I go wrong? If the n is not an array I can get the result correctly though
import numpy as np
from scipy.optimize import fsolve
n = np.arange(10000,100000,10000)
def func(X):
x = X[0]
y = X[1]
L = X[2]
return (x + y + L * (x**2 + y**2 - n))
def dfunc(X):
dLambda = np.zeros(len(X))
h = 1e-3
for i in range(len(X)):
dX = np.zeros(len(X))
dX[i] = h
dLambda[i] = (func(X+dX)-func(X-dX))/(2*h);
return dLambda
X1 = fsolve(dfunc, [1, 1, 0])
print (X1)
Helps would be appreciated, thank you very much
First, check func = fsolve()
Second, print(func([1,1,0]))` - result in not number ([2 2 2 2 2 2 2 2 2]), beause "n" is list. if you want to iterate n try:
import numpy as np
from scipy.optimize import fsolve
n = np.arange(10000,100000,10000)
def func(X,n):
x = X[0]
y = X[1]
L = X[2]
return (x + y + L * (x**2 + y**2 - n))
def dfunc(X,n):
dLambda = np.zeros(len(X))
h = 1e-3
r = 0
for i in range(len(X)):
dX = np.zeros(len(X))
dX[i] = h
dLambda[i] = (func(X+dX,n)-func(X-dX,n))/(2*h)
return dLambda
for iter_n in n:
print("for n = {0} dfunc = {1}".format(iter_n,dfunc([0.8,0.4,0.3],iter_n)))

Categories

Resources