Populating an Array in a loop not working - python

I am dumbfounded right now, I have some code that works generating an array of data and operating on it.
I am trying to sample random sections from my code, in order to check the calculations I am doing.
I have done this before and it has worked fine. I
target_sample =[1,2,10,25,83,62]
df, s_array_track ,z_array_track = MonteCarloValuationAntithetic(df,target_sample)
#df,z,s_array,lookback_scenario = MonteCarloValuation(df)
target_sample =[1,2,10,25,83,62]
lookback = []
for i in range(n_samples):
s = df["current_index"][i]
s_max = df["max_index"][i]
t = df["time to maturity_Months"][i]
sigma = df["volatility"][i]
cap = df["cap_rate"][i]
r = df["interest_rate"][i]
z = np.zeros((int(index_crediting_term*12)+1,n_scenarios))
s_array_track=np.zeros((len(target_sample),int(index_crediting_term*12)+1,n_scenarios))
z_array_track = np.zeros((len(target_sample),int(index_crediting_term*12)+1,n_scenarios))
df_track = df
s_start = df['initial_index'][i]
s_array = np.zeros((int(index_crediting_term*12)+1,n_scenarios))
for k in range(int(n_scenarios/2)):
for j in range(int(t)+1):
drift =( r - .5 *(sigma**2)) * (1/12)
z[j][k] = np.random.normal(0, 1)
diffusion = sigma* z[j][k] * (np.sqrt(1/12))
if j == 0:
s_array[j][k] = s
if (0 < j) and (j < t):
s_array[j][k] = s_array[j-1][k]*np.exp(drift + diffusion)
if j==t:
s_array[j][k] = s_max
else:
continue
for k in range(int(n_scenarios/2),int(n_scenarios)):
for j in range(int(t)+1):
drift =( r - .5 *(sigma**2)) * (1/12)
z[j][k] = -z[j][int(k-n_scenarios/2)]
diffusion = sigma* z[j][k] * (np.sqrt(1/12))
if j == 0:
s_array[j][k] = s
if (0 < j) and (j < t):
s_array[j][k] = s_array[j-1][k]*np.exp(drift + diffusion)
if j == t:
s_array[j][k] = s_max
else:
continue
if i in target_sample:
print(str(i) + " is in Target")
h = target_sample.index(i)
print(str(h))
s_array_track[h] = s_array
z_array_track[h] = z
lookback_temp = max(0,np.mean(np.clip(np.max(((s_array[:][:] / s_start)-1) ,axis =0 ),None,cap))))
lookback.append(lookback_temp)
df["Lookback"] = lookback
I am not getting the results I am expecting. When I do
s_array_track[h] = s_array
Outside of the code it works as expected. What is going on in my loop? I have spent hours on this and I am really confused as to why its not working.

Related

Speed up for loop in python

I have a for loop as follows:
import MDAnalysis as mda
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm as tq
import MDAnalysis.analysis.pca as pca
import random
def PCA_projection(pdb,dcd,atomgroup):
u = mda.Universe(pdb,dcd)
PSF_pca = pca.PCA(u, select=atomgroup)
PSF_pca.run(verbose=True)
n_pcs = np.where(PSF_pca.results.cumulated_variance > 0.95)[0][0]
atomgroup = u.select_atoms(atomgroup)
pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs)
PC1_proj = [pca_space[i][0] for i in range(len(pca_space))]
PC2_proj = [pca_space[i][1] for i in range(len(pca_space))]
return PC1_proj, PC2_proj
def Read_bias_potential(bias_potential):
Bias_potential = pd.read_csv(bias_potential)
Bias_potential = Bias_potential['En-User']
Bias_potential = Bias_potential.values.tolist()
W = [math.exp((-1 * i) / (0.001987*300)) for i in Bias_potential]
return W
def Bin(PC1_prj, PC2_prj, frame_num, min_br1, max_br1, min_br2, max_br2, bin_num, W):
#import pdb;pdb.set_trace()
data1 = PC1_prj[0:frame_num]
bins1 = np.linspace(min_br1, max_br1, bin_num)
bins1 = np.round(bins1,2)
digitized1 = np.digitize(data1, bins1)
binc1 = np.arange(min_br1 + (max_br1 - min_br1)/2*bin_num,
max_br1 + (max_br1 - min_br1)/2*bin_num, (max_br1 - min_br1)/bin_num, dtype = float)
binc1 = np.around(binc1,3)
data2 = PC2_prj[0:frame_num]
bins2 = np.linspace(min_br2, max_br2, bin_num)
bins2 = np.round(bins2,2)
digitized2 = np.digitize(data2, bins2)
binc2 = np.arange(min_br2 + (max_br2 - min_br2)/2*bin_num, max_br2 + (max_br2 - min_br2)/2*bin_num, (max_br2 - min_br2)/bin_num, dtype = float)
binc2 = np.around(binc2,3)
w_array = np.zeros((bin_num,bin_num))
for j in range(frame_num):
w_array[digitized1[j]][digitized2[j]] += (W[digitized1[j]] + W[digitized2[j]])
for m in range(bin_num):
for n in range(bin_num):
if w_array[m][n] == 0:
w_array[m][n] = 1e-100
return w_array, binc1, binc2
def gaussian(Sj1,Slj1,Sj2,Slj2,count):
sigma1 = 0.5
sigma2 = 0.5
Kb = 0.001987204
T = 300
h0 = 0.0001
g = 0
C1 = 0
C2 = 0
for i in range((np.where(Slj2 == Sj2)[0][0] - 5),(np.where(Slj2 == Sj2)[0][0] + 6)):
if i < 0:
C2 = i + 1000
elif i > 999:
C2 = i - 1000
else:
C2 = i
for j in range((np.where(Slj1 == Sj1)[0][0] - 5),(np.where(Slj2 == Sj2)[0][0] + 6)):
if j < 0:
C1 = j + 1000
elif j > 999:
C1 = j -1000
else:
C1 = j
g = g + count[C2,C1] * h0 * np.exp( (-(Sj1 - Slj1[C1]) ** 2 / (2 * sigma1 ** 2)) + (-(Sj2 - Slj2[C2]) ** 2 / (2 * sigma2 ** 2)) )
return np.exp(-g / (Kb * T))
def resampling(binc1, binc2, w_array):
# import pdb;pdb.set_trace()
l =1000
F = np.zeros((l,l))
count = np.zeros((l,l))
Wn = w_array
for i in tq(range(10000000)):
SK1 = random.choice(binc1)
SK2 = random.choice(binc2)
SL1 = random.choice(binc1)
SL2 = random.choice(binc2)
while SK1 == SL1:
SL1 = random.choice(binc1)
while SK2 == SL2:
SL2 = random.choice(binc2)
F[np.where(binc2 == SK2)[0][0]][np.where(binc1 == SK1)[0][0]] = gaussian(SK1,binc1,SK2,binc2,count)
F[np.where(binc2 == SK2)[0][0]][np.where(binc1 == SK1)[0][0]] = gaussian(SL1,binc1,SL2,binc2,count)
W_SK = Wn[np.where(binc2 == SK2)[0][0]][np.where(binc1 == SK1)[0][0]] * F[np.where(binc2 == SK2)[0][0]][np.where(binc1 == SK1)[0][0]]
W_SL = Wn[np.where(binc2 == SL2)[0][0]][np.where(binc1 == SL1)[0][0]] * F[np.where(binc2 == SL2)[0][0]][np.where(binc1 == SL1)[0][0]]
if W_SK <= W_SL:
SK1 = SL1
SK2 = SL2
else:
a = random.random()
if W_SL/W_SK >= a:
SK1 = SL1
SK2 = SL2
else:
SK1 = SK1
SK2 = SK2
#print('SK =',SK)
count[np.where(binc2 == SK2)[0][0]][np.where(binc1 == SK1)[0][0]] += 1
return F
where binc1 and binc2 are two np.arrays, gaussian is a gaussian fxn I defined, is there anyway I can speed up this for loop? Now 1000000 steps takes approximately 50 mins. I am thinking about using pytorch but I got no idea on how to do it. Any suggestions would be helpful!
Thanks
I tried to use pytorch, like put all the variables on gpu but it only does worse.

how do i leave the list with only 1 person

The problem:
https://www.beecrowd.com.br/judge/problems/view/1032
I can't get the numbers out of the people list, this method I did gives the error "pop index out of range".
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# PRIME LIST
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
from audioop import reverse
numerosPrimos = 3501
primos = []
for i in range(2, numerosPrimos + 1):
for j in range(2, int(i ** 0.5) + 1):
if i % j == 0:
break
else:
primos.append(i)
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# PEOPLE LIST
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
numeroPessoas = int(input())
pessoas = []
for l in range(1, numeroPessoas + 1):
pessoas.append(l)
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# REMOVING PEOPLE
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# pessoasEliminada = pessoas
p = primos[0]
pessoas.sort(reverse=True)
mult = 1
while len(pessoas) >= 1 and p <= 3501:
pessoas.pop(len(pessoas) - p)
print(pessoas)
p = p + p

Why is python being slow? And how can I make it faster?

import numpy as np
import random
import matplotlib.pyplot as plt
# set grid size, M*N (row, col)
M: int = 5
N: int = 5
def moves(pos: tuple, dpos: tuple) -> tuple:
return (pos[0] + dpos[0], pos[1] + dpos[1])
def check_neighbors(white_pos: tuple, black_pos: tuple) -> bool:
stationary = white_pos
up = (white_pos[0], white_pos[1] + 1)
upper_right = (white_pos[0] + 1, white_pos[1] + 1)
upper_left = (white_pos[0] - 1, white_pos[1] + 1)
left = (white_pos[0] - 1, white_pos[1])
right = (white_pos[0] + 1, white_pos[1])
lower_left = (white_pos[0] - 1, white_pos[1] - 1)
down = (white_pos[0], white_pos[1] - 1)
lower_right = (white_pos[0] + 1, white_pos[1] - 1)
if (black_pos == stationary) or (black_pos == up) or (black_pos == upper_right) or (black_pos == upper_left) or (black_pos == left) or (black_pos == right) or (black_pos == lower_left) or (black_pos == down) or (black_pos == lower_right):
return True
else:
return False
def run_sim():
w_x0 = random.sample([i for i in range(0,M)], 1)
w_y0 = random.sample([j for j in range(0,N)], 1)
b_x0 = random.sample([i for i in range(0,M)], 1)
b_y0 = random.sample([j for j in range(0,N)], 1)
white = [(x,y) for x, y in zip(w_x0, w_y0)]
black = [(x,y) for x, y in zip(b_x0, b_y0)]
stop: bool = False
n: int = 0
t: int = 0
while stop != True:
if check_neighbors(white[n], black[n]) == True:
stop = True
else:
dt_w = random.sample([i for i in range(-1,2)], 2)
dt_bl = random.sample([i for i in range(-1,2)], 2)
white.append(moves(white[n], dt_w))
black.append(moves(black[n], dt_bl))
t = t+1
n = n+1
return n
t_dist = [run_sim() for i in range(100)]
print(t_dist)
excuse the terrible formatting
when it gets to t_dist = [run_sim() for i in range(100)] it takes FOREVER to run (I mean like wayy over 60 seconds). How can I get it to run faster and get my results? Why is it so computationally expensive?
I am using a jupyter notebook. I also tried just plain running it as a .py file and it is still slow. I tried using the debugger and after setting a breakpoint at t_dist = , it only shows a few iterations and then stops. If I set it to range(5), it works just fine but starts spazzing with bigger numbers (i.e. 100, 10000), which is what I want to run the simulation as.
If you want to know only the distribution, you should define max_t and please adjust the value.
def check_neighbors(a,b):
return abs(a[0]-b[0]) + abs(a[1]-b[1]) <= 1 or\
(abs(a[0]-b[0])==1 and abs(a[1]-b[1])==1)
def moves(a,b):
return (a[0]+b[0], a[1]+b[1])
def run_sim(M=5, N=5, max_t=10**6):
w_co = [random.randrange(0,M), random.randrange(0,N)]# to produce random integer, you can use randrange
b_co = [random.randrange(0,M), random.randrange(0,N)]
t: int = 0
while t<max_t:
if check_neighbors(w_co, b_co) is True: # for bool, `is True` is strict
break
else:
dt_w = [random.randrange(-1,2) for _ in range(2)]
dt_b = [random.randrange(-1,2) for _ in range(2)]
w_co=moves(w_co, dt_w)
b_co=moves(b_co, dt_b)
t+=1
if (t%(max_t//20)==0):
print(f"\t{t}") # check if the program is processing or not
return t
t_dist = []
for i in range(100):
if (i%10==1):
print(i) # check the process
t_dist.append(run_sim(M=5, N=5, max_t=10**6))
print(t_dist)
Additionally, this is a histogram of exapmle.
The graph shows some cases take very, very long time.

Trust-Region Dogleg Method for Nonlinear Equations

Hi I am trying to write a trust-region algorithm using the dogleg method with python for a class I have. I have a Newton's Method algorithm and Broyden's Method algorthm that agree with each other but I can't seem to get this Dogleg method to work.
Here is the function I am trying to find the solution to:
def test_function(x):
x1 = float(x[0])
x2 = float(x[1])
r = np.array([[x2**2 - 1],
[np.sin(x1) - x2]])
return r
and here is the jacobian I wrote
def Test_Jacobian(x, size):
e = create_ID_vec(size)
#print(e[0])
epsilon = 10e-8
J = np.zeros([size,size])
#print (J)
for i in range(0, size):
for j in range(0, size):
J[i][j] = ((test_function(x[i]*e[j] + epsilon*e[j])[i] - test_function(x[i]*e[j])[i])/epsilon)
return J
and here is my Trust-Region algorithm:
def Trust_Region(x):
trust_radius = 1
max_trust = 300
eta = rand.uniform(0,.25)
r = test_function(x) # change to correspond with the function you want
J = Test_Jacobian(r, r.size) # change to correspond with function
i = 0
iteration_table = [i]
function_table = [vector_norm(r, r.size)]
while vector_norm(r, r.size) > 10e-10:
print(x, 'at iteration', i, "norm of r is", vector_norm(r, r.size))
p = dogleg(x, r, J, trust_radius)
rho = ratio(x, J, p)
if rho < 0.25:
print('first')
trust_radius = 0.25*vector_norm(p,p.size)
elif rho > 0.75 and vector_norm(p,p.size) == trust_radius:
print('second')
trust_radius = min(2*trust_radius, max_trust)
else:
print('third')
trust_radius = trust_radius
if rho > eta:
print('x changed')
x = x + p
#r = test_function(x)
#J = Test_Jacobian(r, r.size)
else:
print('x did not change')
x = x
r = test_function(x) # change to correspond with the function you want
J = Test_Jacobian(r, r.size) # change to correspond with function
i = i + 1
#print(r)
#print(J)
#print(vector_norm(p,p.size))
print(rho)
#print(trust_radius)
iteration_table.append(i)
function_table.append(vector_norm(r,r.size))
print ('The solution to the non-linear equation is: ', x)
print ('This solution was obtained in ', i, 'iteratations')
plt.figure(figsize=(10,10))
plt.plot(iteration_table, np.log10(function_table))
plt.xlabel('iteration number')
plt.ylabel('function value')
plt.title('Semi-Log Plot for Convergence')
return x, iteration_table, function_table
def dogleg(x, r, J, trust_radius):
tau_k = min(1, vector_norm(J.transpose().dot(r), r.size)**3/(trust_radius*r.transpose().dot(J).dot(J.transpose().dot(J)).dot(J.transpose()).dot(r)))
p_c = -tau_k*(trust_radius/vector_norm(J.transpose().dot(r), r.size))*J.transpose().dot(r)
if vector_norm(p_c, p_c.size) == trust_radius:
print('using p_c')
p_k = p_c
else:
p_j = -np.linalg.inv(J.transpose().dot(J)).dot(J.transpose().dot(r))
print ('using p_j')
tau = tau_finder(x, p_c, p_j, trust_radius, r.size)
p_k = p_c + tau*(p_j-p_c)
return p_k
def ratio(x, J, p):
r = test_function(x)
r_p = test_function(x + p)
print (vector_norm(r, r.size)**2)
print (vector_norm(r_p, r_p.size)**2)
print (vector_norm(r + J.dot(p), r.size)**2)
rho_k =(vector_norm(r, r.size)**2 - vector_norm(r_p, r_p.size)**2)/(vector_norm(r, r.size)**2 - vector_norm(r + J.dot(p), r.size)**2)
return rho_k
def tau_finder(x, p_c, p_j, trust_radius, size):
a = 0
b = 0
c = 0
for i in range(0, size):
a = a + (p_j[i] - p_c[i])**2
b = b + 2*(p_j[i] - p_c[i])*(p_c[i] - x[i])
c = (p_c[i] - x[i])**2
c = c - trust_radius**2
tau_p = (-b + np.sqrt(b**2 - 4*a*c))/(2*a)
tau_m = (-b - np.sqrt(b**2 - 4*a*c))/(2*a)
#print(tau_p)
#print(tau_m)
if tau_p <= 1 and tau_p >=0:
return tau_p
elif tau_m <= 1 and tau_m >=0:
return tau_m
else:
print('error')
return 'error'
def model_function(p):
r = test_function(x)
J = Test_Jacobian(r, r.size)
return 0.5*vector_norm(r + J.dot(p), r.size)**2
The answer should be about [[1.57076525], [1. ]]
but here is the output after about 28-30 iterations:
ZeroDivisionError Traceback (most recent call last)
<ipython-input-359-a414711a1671> in <module>
1 x = create_point(2,1)
----> 2 Trust_Region(x)
<ipython-input-358-7cb77bd44d7b> in Trust_Region(x)
11 print(x, 'at iteration', i, "norm of r is", vector_norm(r, r.size))
12 p = dogleg(x, r, J, trust_radius)
---> 13 rho = ratio(x, J, p)
14
15 if rho < 0.25:
<ipython-input-358-7cb77bd44d7b> in ratio(x, J, p)
71 print (vector_norm(r_p, r_p.size)**2)
72 print (vector_norm(r + J.dot(p), r.size)**2)
---> 73 rho_k =(vector_norm(r, r.size)**2 - vector_norm(r_p, r_p.size)**2)/(vector_norm(r, r.size)**2 - vector_norm(r + J.dot(p), r.size)**2)
74 return rho_k
75
ZeroDivisionError: float division by zero

Cannot Scatter, Plot, Show() In While Loop

import math
import pylab as plt
import numpy
from numpy import sqrt
from scipy.integrate import quad
import random
numpy.seterr(divide='ignore', invalid='ignore')
def integrand (x):
return sqrt(1-x**2)
q1area, err = quad(integrand,0,1)
print "This program estimates the convergence of Pi to a ratio of one."
while True:
print "Please choose from one of the five following options:"
print " 1. 10^1\n 2. 10^2\n 3. 10^3\n"
choice = int(raw_input())
options = {1,2,3}
if choice == 1:
plt.xlim([0,15])
plt.ylim([-5,5])
x = numpy.linspace(0,15,500)
y = numpy.sqrt(1-x**2)
z = 1+x*0
xcord = []
ycord = []
under = []
above = []
pratiolist = []
yvalues = []
xvalues = range(1,11)
for i in range(10):
xcord.append(random.random())
ycord.append(random.random())
for j in ycord:
if (j <= q1area):
under.append(1)
else:
above.append(1)
punder = len(under)
if punder == 0:
punder = punder + 1
pabove = len(above)
if pabove == 0:
pabove = pabove + 1
pratio = punder / float(pabove)
pratiolist.append(pratio)
for k in pratiolist:
rtpi = k / float(math.pi)
yvalues.append(rtpi)
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
if choice == 2:
plt.xlim([0,110])
plt.ylim([-5,5])
x = numpy.linspace(0,110,500)
y = numpy.sqrt(1-x**2)
z = 1+x*0
xcord = []
ycord = []
under = []
above = []
pratiolist = []
yvalues = []
xvalues = range(1,101)
for i in range(100):
xcord.append(random.random())
ycord.append(random.random())
for j in ycord:
if (j <= q1area):
under.append(1)
else:
above.append(1)
punder = len(under)
if punder == 0:
punder = punder + 1
pabove = len(above)
if pabove == 0:
pabove = pabove + 1
pratio = punder / float(pabove)
pratiolist.append(pratio)
for k in pratiolist:
rtpi = k / float(math.pi)
yvalues.append(rtpi)
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
if choice == 3:
plt.xlim([0,1100])
plt.ylim([-5,5])
x = numpy.linspace(0,1100,500)
y = numpy.sqrt(1-x**2)
z = 1+x*0
xcord = []
ycord = []
under = []
above = []
pratiolist = []
yvalues = []
xvalues = range(1,1001)
for i in range(1000):
xcord.append(random.random())
ycord.append(random.random())
for j in ycord:
if (j <= q1area):
under.append(1)
else:
above.append(1)
punder = len(under)
if punder == 0:
punder = punder + 1
pabove = len(above)
if pabove == 0:
pabove = pabove + 1
pratio = punder / float(pabove)
pratiolist.append(pratio)
for k in pratiolist:
rtpi = k / float(math.pi)
yvalues.append(rtpi)
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
while choice not in options:
print "Not a valid choice!\n"
break
#plt.scatter(xvalues,yvalues,c='b')
#plt.plot(x,z,'g')
#plt.show()
The only way I can get the graphs to show is if I place break statements at the end of every if choice == 1,2,3, etc. and then place:
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
At the bottom of my code. This is inconvenient, I would like my to loop endlessly allowing choice between 1,2,3 without having to rerun the program. Why does Python's graphs crash when they are in whiles?
UPDATE
By using plt.draw(), I was able to get the graph to at least show but it still is not responding.
If by not responding you mean it doesn't show the prompt again this is because plt.show() will cause the program to stop until the window is closed. You can replace the plt.show()'s with plt.draw(), but to actually have windows come up you need to be in interactive mode. This is accomplished by calling plt.ion() sometime before any of the draw calls (I put it before the while True:). I've tested it an this should accomplish the behavior you're looking for.
Edit: Since you aren't redrawing the same data, calling draw() will append the data to the specific plot (i.e. typing 1 in over and over will keep adding points). I don't know what type of behavior you're looking for but you may want to call plt.clf before each scatter call if you want to clear the figure.

Categories

Resources