Why is python being slow? And how can I make it faster? - python

import numpy as np
import random
import matplotlib.pyplot as plt
# set grid size, M*N (row, col)
M: int = 5
N: int = 5
def moves(pos: tuple, dpos: tuple) -> tuple:
return (pos[0] + dpos[0], pos[1] + dpos[1])
def check_neighbors(white_pos: tuple, black_pos: tuple) -> bool:
stationary = white_pos
up = (white_pos[0], white_pos[1] + 1)
upper_right = (white_pos[0] + 1, white_pos[1] + 1)
upper_left = (white_pos[0] - 1, white_pos[1] + 1)
left = (white_pos[0] - 1, white_pos[1])
right = (white_pos[0] + 1, white_pos[1])
lower_left = (white_pos[0] - 1, white_pos[1] - 1)
down = (white_pos[0], white_pos[1] - 1)
lower_right = (white_pos[0] + 1, white_pos[1] - 1)
if (black_pos == stationary) or (black_pos == up) or (black_pos == upper_right) or (black_pos == upper_left) or (black_pos == left) or (black_pos == right) or (black_pos == lower_left) or (black_pos == down) or (black_pos == lower_right):
return True
else:
return False
def run_sim():
w_x0 = random.sample([i for i in range(0,M)], 1)
w_y0 = random.sample([j for j in range(0,N)], 1)
b_x0 = random.sample([i for i in range(0,M)], 1)
b_y0 = random.sample([j for j in range(0,N)], 1)
white = [(x,y) for x, y in zip(w_x0, w_y0)]
black = [(x,y) for x, y in zip(b_x0, b_y0)]
stop: bool = False
n: int = 0
t: int = 0
while stop != True:
if check_neighbors(white[n], black[n]) == True:
stop = True
else:
dt_w = random.sample([i for i in range(-1,2)], 2)
dt_bl = random.sample([i for i in range(-1,2)], 2)
white.append(moves(white[n], dt_w))
black.append(moves(black[n], dt_bl))
t = t+1
n = n+1
return n
t_dist = [run_sim() for i in range(100)]
print(t_dist)
excuse the terrible formatting
when it gets to t_dist = [run_sim() for i in range(100)] it takes FOREVER to run (I mean like wayy over 60 seconds). How can I get it to run faster and get my results? Why is it so computationally expensive?
I am using a jupyter notebook. I also tried just plain running it as a .py file and it is still slow. I tried using the debugger and after setting a breakpoint at t_dist = , it only shows a few iterations and then stops. If I set it to range(5), it works just fine but starts spazzing with bigger numbers (i.e. 100, 10000), which is what I want to run the simulation as.

If you want to know only the distribution, you should define max_t and please adjust the value.
def check_neighbors(a,b):
return abs(a[0]-b[0]) + abs(a[1]-b[1]) <= 1 or\
(abs(a[0]-b[0])==1 and abs(a[1]-b[1])==1)
def moves(a,b):
return (a[0]+b[0], a[1]+b[1])
def run_sim(M=5, N=5, max_t=10**6):
w_co = [random.randrange(0,M), random.randrange(0,N)]# to produce random integer, you can use randrange
b_co = [random.randrange(0,M), random.randrange(0,N)]
t: int = 0
while t<max_t:
if check_neighbors(w_co, b_co) is True: # for bool, `is True` is strict
break
else:
dt_w = [random.randrange(-1,2) for _ in range(2)]
dt_b = [random.randrange(-1,2) for _ in range(2)]
w_co=moves(w_co, dt_w)
b_co=moves(b_co, dt_b)
t+=1
if (t%(max_t//20)==0):
print(f"\t{t}") # check if the program is processing or not
return t
t_dist = []
for i in range(100):
if (i%10==1):
print(i) # check the process
t_dist.append(run_sim(M=5, N=5, max_t=10**6))
print(t_dist)
Additionally, this is a histogram of exapmle.
The graph shows some cases take very, very long time.

Related

how can I run this code with two loops faster? Can I run it without using for?

I wanna run this code for a wide range instead of this range. So I wanna make it better to run faster.
Is it impossible to use something else instead of these loops?
z1=3
z2=HEIGHT-1
def myfunction(z1,z2):
for l in range(z1):
vector = np.zeros(WIDTH)
vector[WIDTH//2] = 1
result = []
result.append(vector)
for i in range(z2):
vector = doPercolationStep(vector, PROP, i)
result.append(vector)
result = np.array(result)
ss = result.astype(int)
ss = np.where(ss==0, -1, ss)
ww = (ss+(ss.T))/2
re_size = ww/(np.sqrt(L))
matr5 = re_size
np.savetxt('F:/folder/matr5/'+str(l)+'.csv', matr5)
and doPercolationStep is:
WIDTH = 5
HEIGHT = 5
L=5
PROP = 0.6447
def doPercolationStep(vector, PROP, time):
even = time%2 # even is 1 or 0
vector_copy = np.copy(vector)
WIDTH = len(vector)
for i in range(even, WIDTH, 2):
if vector[i] == 1:
pro1 = random.random()
pro2 = random.random()
if pro1 < PROP:
vector_copy[(i+WIDTH-1)%WIDTH] = 1 # left neighbour of i
if pro2 < PROP:
vector_copy[(i+1)%WIDTH] = 1 # right neighbour of i
vector_copy[i] = 0
return vector_copy

Populating an Array in a loop not working

I am dumbfounded right now, I have some code that works generating an array of data and operating on it.
I am trying to sample random sections from my code, in order to check the calculations I am doing.
I have done this before and it has worked fine. I
target_sample =[1,2,10,25,83,62]
df, s_array_track ,z_array_track = MonteCarloValuationAntithetic(df,target_sample)
#df,z,s_array,lookback_scenario = MonteCarloValuation(df)
target_sample =[1,2,10,25,83,62]
lookback = []
for i in range(n_samples):
s = df["current_index"][i]
s_max = df["max_index"][i]
t = df["time to maturity_Months"][i]
sigma = df["volatility"][i]
cap = df["cap_rate"][i]
r = df["interest_rate"][i]
z = np.zeros((int(index_crediting_term*12)+1,n_scenarios))
s_array_track=np.zeros((len(target_sample),int(index_crediting_term*12)+1,n_scenarios))
z_array_track = np.zeros((len(target_sample),int(index_crediting_term*12)+1,n_scenarios))
df_track = df
s_start = df['initial_index'][i]
s_array = np.zeros((int(index_crediting_term*12)+1,n_scenarios))
for k in range(int(n_scenarios/2)):
for j in range(int(t)+1):
drift =( r - .5 *(sigma**2)) * (1/12)
z[j][k] = np.random.normal(0, 1)
diffusion = sigma* z[j][k] * (np.sqrt(1/12))
if j == 0:
s_array[j][k] = s
if (0 < j) and (j < t):
s_array[j][k] = s_array[j-1][k]*np.exp(drift + diffusion)
if j==t:
s_array[j][k] = s_max
else:
continue
for k in range(int(n_scenarios/2),int(n_scenarios)):
for j in range(int(t)+1):
drift =( r - .5 *(sigma**2)) * (1/12)
z[j][k] = -z[j][int(k-n_scenarios/2)]
diffusion = sigma* z[j][k] * (np.sqrt(1/12))
if j == 0:
s_array[j][k] = s
if (0 < j) and (j < t):
s_array[j][k] = s_array[j-1][k]*np.exp(drift + diffusion)
if j == t:
s_array[j][k] = s_max
else:
continue
if i in target_sample:
print(str(i) + " is in Target")
h = target_sample.index(i)
print(str(h))
s_array_track[h] = s_array
z_array_track[h] = z
lookback_temp = max(0,np.mean(np.clip(np.max(((s_array[:][:] / s_start)-1) ,axis =0 ),None,cap))))
lookback.append(lookback_temp)
df["Lookback"] = lookback
I am not getting the results I am expecting. When I do
s_array_track[h] = s_array
Outside of the code it works as expected. What is going on in my loop? I have spent hours on this and I am really confused as to why its not working.

How to use more than 1 cpu core with this code?

I have this code, to convert .VCF files to .GENO files. To test it, I just used the smallest file on my laptop, but for all, I need a bigger machine, and it would be really nice, if it wouldn't take weeks to run. It works perfectly on my laptop (I7 4th gen), but really slow on our workstation server (have 48 cores, but slower). How can I modify the code, to use more cores? Thank you in advance.
The code:
import allel
import pandas as pd
import numpy as np
from time import process_time
import numba as nb
#nb.jit(forceobj=True)
def create_chrpos(data, n):
chr_pos = []
chr_pos = np.array(chr_pos, dtype=np.int32)
for i in range(len(data)):
if data['chr'][i] == n:
if i == 0:
chr_pos = data['pos'][0]
else:
a = data['pos'][i]
chr_pos = np.append(chr_pos, [a])
return chr_pos
#nb.njit
def create_needed_pos(chr_pos, pos):
needed_pos = nb.typed.List.empty_list(nb.int32)
for i in range(len(chr_pos)):
for k in range(len(pos)):
if chr_pos[i] == pos[k]:
if i == k == 1:
needed_pos = nb.typed.List([pos[k]])
else:
needed_pos.append(pos[k])
return needed_pos
#nb.njit
def create_needed_index(chr_pos, pos):
needed_index = nb.typed.List.empty_list(nb.int32)
for i in range(len(chr_pos)):
for k in range(len(pos)):
if chr_pos[i] == pos[k]:
if i == k == 1:
needed_index = nb.typed.List([pos[k]])
else:
needed_index.append(pos[k])
return needed_index
#nb.njit
def create_mat(geno):
# create matrix as np.uint8 (1 byte) instead of list of python integers (8 byte)
# also no need to dynamically resize / increase list size
geno_mat = np.zeros((len(geno[:, 0]), len(geno[1, :])), dtype=np.uint8)
for i in np.arange(len(geno[:, 0])):
for k in np.arange(len(geno[1, :])):
g = geno[i, k]
# nested ifs to avoid duplicate comparisons
if g[0] == 0:
if g[1] == 0:
geno_mat[i, k] = 2
elif g[1] == 1:
geno_mat[i, k] = 1
else:
geno_mat[i, k] = 9
elif g[0] == 1:
if g[1] == 0:
geno_mat[i, k] = 1
elif g[1] == 1:
geno_mat[i, k] = 0
else:
geno_mat[i, k] = 9
else:
geno_mat[i, k] = 9
return geno_mat
def genotyping(geno, pos, chr_pos):
needed_pos = create_needed_pos(chr_pos, pos)
create_needed_index(chr_pos, pos)
mat = create_mat(geno)
list_difference = [item for item in chr_pos if item not in needed_pos]
needed_pos_list = list(needed_pos)
matrix_df = pd.DataFrame(mat, dtype=int, index=pos)
filtered_geno_dataframe = matrix_df.loc[needed_pos_list, :]
missing_positions_df = pd.DataFrame(index=list_difference, columns=np.arange(2054))
missing_positions_df.fillna(2, inplace=True)
finaldataframe = pd.concat([filtered_geno_dataframe, missing_positions_df])
finaldataframe.sort_index(axis=0, inplace=True)
final_mat = finaldataframe.to_numpy(dtype=np.int32)
return final_mat
def write_first_chr(genotype):
with open('test_1.geno', 'wb') as fout: # Note 'wb' instead of 'w'
np.savetxt(fout, genotype, delimiter="", fmt='%d')
fout.seek(-2, 2)
fout.truncate()
def write_remaining_chr(genotype):
with open('test_1.geno', 'a') as fout: # Note 'wb' instead of 'w'
np.savetxt(fout, genotype, delimiter="", fmt='%d')
fout.seek(-2, 2)
fout.truncate()
if __name__ == "__main__":
t1_start = process_time()
data = pd.read_csv('REICH_1KG.snp', delimiter=r"\s+")
data.columns = ['ID', "chr", "pyspos", "pos", "Ref", "Alt"]
samples = open("sample_list_test.txt")
for i, line in enumerate(samples):
strip_line = line.strip()
n = i + 1
chr_pos = create_chrpos(data, n)
geno = allel.read_vcf(strip_line, fields=("calldata/GT",))["calldata/GT"]
pos = allel.read_vcf(strip_line, fields=("variants/POS",))["variants/POS"]
genotype = genotyping(geno, pos, chr_pos)
if i + 1 == 1:
print("First chromosome done")
write_first_chr(genotype)
else:
write_remaining_chr(genotype)
print("Done:Chr number:", n)
print("Finished genotyping")
t1_stop = process_time()
print("Ennyi idő kellett teszt1:", t1_stop - t1_start)

How to give each Category a color?

We have a code to draw circles on the Location on the map with the name of each category. Now the circles and text are one color. How do we get them in different color's by category? Example: Category Garden: Blue, Category Stone: Grey.
So far the code:
size(1500,800)
background(1)
nofill()
stroke('#f91')
pen(.2)
fill('#f91', 0.05)
rotate(90)
font("Avenir", "bold", 10)
align('left')
def mapValue(value, fromMin, fromMax, toMin, toMax):
# Figure out how 'wide' each range is
fromSpan = fromMax - fromMin
toSpan = toMax - toMin
# Convert the from range into a 0-1 range (float)
valueScaled = float(value - fromMin) / float(fromSpan)
# Convert the 0-1 range into a value in the to range.
return toMin + (valueScaled * toSpan)
def xOfDot(lon):
return mapValue(lon, -100, 100, 0, WIDTH)
def yOfDot(lat):
return mapValue(lat, -90, 90, HEIGHT, 0)
with open('theft-alerts.json', 'r') as inputFile:
data = json.load(inputFile)
print len(data)
artworksPerCity = {}
for stolenArt in data:
if stolenArt.has_key('Category'):
city = stolenArt['Category']
if stolenArt.has_key('nItemsStolen'):
numbersStolen = int(stolenArt['nItemsStolen'])
if artworksPerCity.has_key(city):
# Adjust the value stored for this city
artworksPerCity[city] = artworksPerCity[city] + numbersStolen
else:
# Create new key with new value
artworksPerCity[city] = numbersStolen
# Draw circle on the map
radius = artworksPerCity[city] /2
x = xOfDot(stolenArt['Lon'])
y = yOfDot(stolenArt['Lat'])
arc(x, y, radius)
text(city, x, y)
print artworksPerCity
Here is a sketch of what I intend to include in my pure python data utility.
def hexidecimalDiget(n,deHex = false):
if(n<0):
print "negitive values not supported by call to hexidecimalDiget("+str(n)+")"
return None
elif(n < 10):
return str(n)
elif(n < 15):
return ["a","b","c","d","e"][n-10]
elif(n in ["a","b","c","d","e"]):
if deHex:
return ["a","b","c","d","e"].index(n)
return n
else:
print "call to hexidecimalDiget("+str(n)+") not supported!"
return None
def colorFormHexArray(arr):
if len(arr)!=3 and len(arr)!=6:
print "invalid length for color on call to colorFormHexArray("+str(arr)+")"
return None
elif None in arr:
print "cannot make color from None arguments in "+str(arr)
return None
else:
ret = "#"
for k in arr:
if(type(k) == list):
for k2 in k:
ret+=hexidecimalDiget(k)
else:
ret+=hexidecimalDiget(k)
return ret
def arrayFromColor(c):
c = c.replace("#","")
col = []
for n,k in enumerate(c):
if(len(c) == 3):
col.append([hexidecimalDiget(k,deHex = True)])
elif(len(c) == 6):
col.append([hexidecimalDiget(c[(n+1)*2-2],deHex = True),hexidecimalDiget(c[(n+1)*2-2],deHex = True)])
return(col)
def intFromHexPair(hp):
ret = 0
for n,k in enumerate(hp):
digBase = 16**(len(hp)-n-1)
ret+=digBase*hexidecimalDiget(hp[0],deHex = True)
return ret
def hexPairFromInt(I,minDigits = 1,maxDigits = 256):
if I<0:
print "negitive numbers not supported by hexPairFromInt"
k= 0
while(16**(k+1) <= I):
k+=1
if k < minDigits:
k = minDigits
if k > minDigits:
print("maxDigitsExceeded")
ret = []
while k>=0
dig = 16**k
ret.append(hexidecimalDiget(int(I)%(dig))
I -= dig
k-=1
return ret
def specColor(start,end,bottom,top):
start = arrayFromColor(start)
end = arrayFromColor(end)
def ret(v):
if( v<start or c>end ):
print("value out of range "+str([start,end]))
return('#aa0000') #eyo <- error red
else:
starts = [intFromHexPair(k) for k in start]
ends = [intFromHexPair(hp) for k in end]
normalized = (v-bottom)/(top-bottom)
return colorFormHexArray([hexPairFromInt(int((starts[n]-ends[n])*normalized),minDigits = 1,maxDigits = 256) for n,k in enumerate(starts)])
return ret
This seems excessive and hasn't even been slightly tested yet (just a stetch up atm) but I'll be testing and incorporating this code here tonight :: http://krewn.github.io/KPlot/

Cannot Scatter, Plot, Show() In While Loop

import math
import pylab as plt
import numpy
from numpy import sqrt
from scipy.integrate import quad
import random
numpy.seterr(divide='ignore', invalid='ignore')
def integrand (x):
return sqrt(1-x**2)
q1area, err = quad(integrand,0,1)
print "This program estimates the convergence of Pi to a ratio of one."
while True:
print "Please choose from one of the five following options:"
print " 1. 10^1\n 2. 10^2\n 3. 10^3\n"
choice = int(raw_input())
options = {1,2,3}
if choice == 1:
plt.xlim([0,15])
plt.ylim([-5,5])
x = numpy.linspace(0,15,500)
y = numpy.sqrt(1-x**2)
z = 1+x*0
xcord = []
ycord = []
under = []
above = []
pratiolist = []
yvalues = []
xvalues = range(1,11)
for i in range(10):
xcord.append(random.random())
ycord.append(random.random())
for j in ycord:
if (j <= q1area):
under.append(1)
else:
above.append(1)
punder = len(under)
if punder == 0:
punder = punder + 1
pabove = len(above)
if pabove == 0:
pabove = pabove + 1
pratio = punder / float(pabove)
pratiolist.append(pratio)
for k in pratiolist:
rtpi = k / float(math.pi)
yvalues.append(rtpi)
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
if choice == 2:
plt.xlim([0,110])
plt.ylim([-5,5])
x = numpy.linspace(0,110,500)
y = numpy.sqrt(1-x**2)
z = 1+x*0
xcord = []
ycord = []
under = []
above = []
pratiolist = []
yvalues = []
xvalues = range(1,101)
for i in range(100):
xcord.append(random.random())
ycord.append(random.random())
for j in ycord:
if (j <= q1area):
under.append(1)
else:
above.append(1)
punder = len(under)
if punder == 0:
punder = punder + 1
pabove = len(above)
if pabove == 0:
pabove = pabove + 1
pratio = punder / float(pabove)
pratiolist.append(pratio)
for k in pratiolist:
rtpi = k / float(math.pi)
yvalues.append(rtpi)
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
if choice == 3:
plt.xlim([0,1100])
plt.ylim([-5,5])
x = numpy.linspace(0,1100,500)
y = numpy.sqrt(1-x**2)
z = 1+x*0
xcord = []
ycord = []
under = []
above = []
pratiolist = []
yvalues = []
xvalues = range(1,1001)
for i in range(1000):
xcord.append(random.random())
ycord.append(random.random())
for j in ycord:
if (j <= q1area):
under.append(1)
else:
above.append(1)
punder = len(under)
if punder == 0:
punder = punder + 1
pabove = len(above)
if pabove == 0:
pabove = pabove + 1
pratio = punder / float(pabove)
pratiolist.append(pratio)
for k in pratiolist:
rtpi = k / float(math.pi)
yvalues.append(rtpi)
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
while choice not in options:
print "Not a valid choice!\n"
break
#plt.scatter(xvalues,yvalues,c='b')
#plt.plot(x,z,'g')
#plt.show()
The only way I can get the graphs to show is if I place break statements at the end of every if choice == 1,2,3, etc. and then place:
plt.scatter(xvalues,yvalues,c='b')
plt.plot(x,z,'g')
plt.show()
At the bottom of my code. This is inconvenient, I would like my to loop endlessly allowing choice between 1,2,3 without having to rerun the program. Why does Python's graphs crash when they are in whiles?
UPDATE
By using plt.draw(), I was able to get the graph to at least show but it still is not responding.
If by not responding you mean it doesn't show the prompt again this is because plt.show() will cause the program to stop until the window is closed. You can replace the plt.show()'s with plt.draw(), but to actually have windows come up you need to be in interactive mode. This is accomplished by calling plt.ion() sometime before any of the draw calls (I put it before the while True:). I've tested it an this should accomplish the behavior you're looking for.
Edit: Since you aren't redrawing the same data, calling draw() will append the data to the specific plot (i.e. typing 1 in over and over will keep adding points). I don't know what type of behavior you're looking for but you may want to call plt.clf before each scatter call if you want to clear the figure.

Categories

Resources