I am trying to reconstruct the approximations and details at all levels using the inverse stationary wavelet transform from the by wavelets package in python. My code is the following:
def UDWT(Btotal, wname, Lps, Hps, edge_eff):
Br = Btotal[0]; Bt = Btotal[1]; Bn = Btotal[2]
## Set parameters needed for UDWT
# If length of data is odd, turn into even numbered sample by getting rid
# of one point
if np.mod(samplelength,2)>0:
Br = Br[0:-1]
Bt = Bt[0:-1]
Bn = Bn[0:-1]
samplelength = len(Br)
# edge extension mode set to periodic extension by default with this
# routine in the rice toolbox.
pads = 2**(np.ceil(np.log2(abs(samplelength))))-samplelength # for edge extension, This function
# returns 2^{ the next power of 2 }for input: samplelength
## Do the UDWT decompositon and reconstruction
keep_all = {}
for m in range(3):
# Gets the data size up to the next power of 2 due to UDWT restrictions
# Although periodic extension is used for the wavelet edge handling we are
# getting the data up to the next power of 2 here by extending the data
# sample with a constant value
if (m==0):
y = np.pad(Br,pad_width = int(pads/2) ,constant_values=np.nan)
elif (m==1):
y = np.pad(Bt,pad_width = int(pads/2) ,constant_values=np.nan)
y = np.pad(Bn,pad_width = int(pads/2) ,constant_values=np.nan)
# Decompose the signal using the UDWT
nlevel = min(pywt.swt_max_level(y.shape[-1]), 8) # Level of decomposition, impose upper limit 10
Coeff = pywt.swt(y, wname, nlevel) # List of approximation and details coefficients
# pairs in order similar to wavedec function:
# [(cAn, cDn), ..., (cA2, cD2), (cA1, cD1)]
# Assign approx: swa and details: swd to
swa = np.zeros((len(y),nlevel))
swd = np.zeros((len(y),nlevel))
for o in range(nlevel):
swa[:,o] = Coeff[o][0]
swd[:,o] = Coeff[o][1]
# Reconstruct all the approximations and details at all levels
mzero = np.zeros(np.shape(swd))
A = mzero
coeffs_inverse = list(zip(swa.T,mzero.T))
invers_res = pywt.iswt(coeffs_inverse, wname)
D = mzero
for pp in range(nlevel):
swcfs = mzero
swcfs[:,pp] = swd[:,pp]
coeffs_inverse2 = list(zip(np.zeros((len(swa),1)).T , swcfs.T))
D[:,pp] = pywt.iswt(coeffs_inverse2, wname)
for jjj in range(nlevel-1,-1,-1):
if (jjj==nlevel-1):
A[:,jjj] = invers_res
# print(jjj)
A[:,jjj] = A[:,jjj+1] + D[:,jjj+1]
# print(jjj)
# *************************************************************************
# *************************************************************************
# Correct for linear phase shift in wavelet coefficients at each level. No
# need to do this for the low-pass filters approximations as they will be
# reconstructed and the shift will automatically be reversed. The formula
# for the shift has been taken from Walden's paper, or has been made up by
# me (can't exactly remember) -- but it is verified and correct.
# *************************************************************************
for j in range(1,nlevel+1):
shiftfac = Hps*(2**(j-1));
for l in range(1,j):
shiftfac = int(shiftfac + Lps*(2**(l-2))*((l-2)>=0)) ;
swd[:,j-1] = np.roll(swd[:,j-1],shiftfac)
flds = {"A": A.T,
"D": D.T,
"swd" : swd.T,
Btot = ['Br', 'Bt', 'Bn'] # Used Just to name files
keep_all[str(Btot[m])] = flds
# 1) Put all the files together into a cell structure
Apr = {}
Swd = {}
pads = int(pads)
names = ['Br', 'Bt', 'Bn']
for kk in range(3):
A = keep_all[names[kk]]['A']
Apr[names[kk]] = A[:,int(pads/2):len(A)-int(pads/2)]
swd = keep_all[names[kk]]['swd']
Swd[names[kk]] = swd[:,int(pads/2):len(A)-int(pads/2)]
# Returns filters list for the current wavelet in the following order
wavelet = pywt.Wavelet(wname)
[h_0,h_1,_,_] = wavelet.inverse_filter_bank
filterlength = len(h_0)
if edge_eff:
# 2) Getting rid of the edge effects; to keep edges skip this section
for j in range(1,nlevel+1):
extra = int((2**(j-2))*filterlength) # give some reasoning for this eq
for m in range(3):
# for approximations
Apr[names[m]][j-1][0:extra] = np.nan
Apr[names[m]][j-1][-extra:-1] = np.nan
# for details
Swd[names[m]][j-1][0:extra] = np.nan
Swd[names[m]][j-1][-extra:-1] = np.nan
return Apr, Swd, pads, nlevel
aa = np.sin(np.linspace(0,2*np.pi,100000))+0.05*np.random.rand(100000)
bb = np.cos(np.linspace(0,2*np.pi,100000))+0.05*np.random.rand(100000)
cc = np.cos(np.linspace(0,4*np.pi,100000))+0.05*np.random.rand(100000)
Btotal = [aa,bb,cc]
wname ='coif2'
Lps = 7; # Low pass filter phase shift for level 1 Coiflet2
Hps = 4; # High pass filter phase shift for level 1 Coiflet2
Apr, Swd, pads, nlevel = UDWT(Btotal, wname, Lps, Hps, edge_eff)
### Add the details at all levels with the highest level approximations
## to compare with the original timeseries. (The equation shown in website)
new = Swd['Br'][0]
for i in range(1,nlevel):
new = Swd['Br'][i]+new
sig = Apr['Br'][-1]+new
### Now plot to comapre ##
## Reconstructed signal 1
### Second way to get reconstructed signal
### aa first level details with approximations
plt.plot(Apr['Br'][-1] +Swd['Br'][-1] )
### Original signal
I am trying to follow the procedure described on this website:
However, the reconstructed time-series does not seem to match the original exactly. As you can see here:
Any help?
According to the original paper by Huang
The marginal Hibert spectrum is given by:
where A = A(w,t) (i.e., a function time and frequency) and p(w,A)
the joint probability density function of P(ω, A) of the frequency [ωi] and amplitude [Ai].
I am trying to estimate 1) The joint probability density using the plt.hist2d 2) the integral shown below using a sum.
The code I am using is the following:
IA_flat1 = np.ravel(IA) ### Turn matrix to 1 D array
IF_flat1 = np.ravel(IF) ### Here IA corresponds to A
IF_flat = IF_flat1[(IF_flat1>min_f) & (IF_flat1<fs)] ### Keep only desired frequencies
IA_flat = IA_flat1[(IF_flat1>min_f) & (IF_flat1<fs)] ### Keep IA that correspond to desired frequencies
### return the Joint probability density
Pjoint,f_edges, A_edges,_ = plt.hist2d(IF_flat,IA_flat,bins=[bins_F,bins_A], density=True)
n1 = np.digitize(IA_flat, A_edges).astype(int) ### Return the indices of the bins to which
n2 = np.digitize(IF_flat, f_edges).astype(int) ### each value in input array belongs.
### define integration function
from numba import jit, prange ### Numba is added for speed
#jit(nopython=True, parallel= True)
def get_int(A_edges, Pjoint ,IA_flat, n1, n2):
dA = np.diff(A_edges)[0] ### Find dx for integration
sum_h = np.zeros(np.shape(Pjoint)[0]) ### Intitalize array
for j in prange(np.shape(Pjoint)[0]):
h = np.zeros(np.shape(Pjoint)[1]) ### Intitalize array
for k in prange(np.shape(Pjoint)[1]):
needed = IA_flat[(n1==k) & (n2==j)] ### Keep only the elements of arrat that
### are related to PJoint[j,k]
h[k] = Pjoint[j,k]*np.nanmean(needed**2)*dA ### Pjoint*A^2*dA
sum_h[j] = np.nansum(h) ### Sum_{i=0}^{N}(Pjoint*A^2*dA)
return sum_h
### Now run previously defined function
sum_h = get_int(A_edges, Pjoint ,IA_flat, n1, n2)
1) I am not sure that everything is correct though. Any suggestions or comments on what I might be doing wrong?
2) Is there a way to do the same using a scipy integration scheme?
You can extract the probability from the 2D histogram and use it for the integration:
# Added some numbers to have something to run
import numpy as np
import matplotlib.pyplot as plt
IA = np.random.rand(100,100)
IF = np.random.rand(100,100)
bins_F = np.linspace(0,1,20)
bins_A = np.linspace(0,1,100)
min_f = 0
fs = 1.0
IA_flat1 = np.ravel(IA) ### Turn matrix to 1 D array
IF_flat1 = np.ravel(IF) ### Here IA corresponds to A
IF_flat = IF_flat1[(IF_flat1>min_f) & (IF_flat1<fs)] ### Keep only desired frequencies
IA_flat = IA_flat1[(IF_flat1>min_f) & (IF_flat1<fs)] ### Keep IA that correspond to desired frequencies
### return the Joint probability density
Pjoint,f_edges, A_edges,_ = plt.hist2d(IF_flat,IA_flat,bins=[bins_F,bins_A], density=True)
f_values = (f_edges[1:]+f_edges[:-1])/2
A_values = (A_edges[1:]+A_edges[:-1])/2
dA = A_values[1]-A_values[0] # for the integral
#Pjoint.shape (19,99)
h = np.zeros(f_values.shape)
for i in range(len(f_values)):
f = f_values[i]
# column of the histogram with frequency f, probability
p = Pjoint[i]
# summatory equivalent to the integral
integral_result = np.sum(p*A_values**2*dA )
h[i] = integral_result
I am lost within the pymcmcstat documentation of Python. I managed to plot the parameter distributions etc, but when it comes to the Bayes factor, I need to calculate the integral over the parameter space of likelihood for each model.
I followed this video. Each model has a different model function with different parameters. According to this link, I am supposed to compare the model evidences for model selection. All I have in my hand is the chain results after burnin that returns the distribution for each parameters, chain for sum-of-squares error (SSE) and variances. How do I compare the models with mcmc chain results I have?
Where do I go from here?
Here is my code for one model; for each model, the test_modelfun is changed and the chain results are saved for further comparison of different models;
# Data related lines: input omega and output fm
x = (np.array([76.29395, 152.5879, 305.1758, 610.3516, 1220.703, 2441.406, 4882.813, 9765.625, 19531.25, 39062.5, 78125, 156250, 312500, 625000]))
y = np.array([155.6412886 -63.3826188j , 113.9114436 -79.90544719j, 64.97809441-77.65152741j, 26.87482243-57.38474656j, 7.44462341-34.02438426j, 2.32954856-16.17918216j, 2.30747953 -6.72487436j, 3.39658859 -2.72444011j, 4.0084345 -1.2029167j , 4.25877486 -0.70276446j, 4.11761329 -0.69591231j, 3.83339489 -0.65244854j, 3.47289164 -0.6079278j , 3.07027319 -0.14914359j])
#import mcmc library and add data to the library in the second line below
mcstat = MCMC()
##define transfer function model calculated with theta parameters
def test_modelfun(xdata, theta):
K, alpha_0, alpha_1, Tp_1, Tp_2, Tz_1 = 10**theta[0], 10**theta[1], 10**theta[2], 10**theta[3], 10**theta[4], 10**theta[5]
Pz_0 = (omega**(alpha_0))
Pz_1 = (np.sqrt(((Tp_1**2)*(omega**(2*alpha_1))) + (2*Tp_1*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
Pz_2 = (np.sqrt(((Tp_2**2)*(omega**(2*alpha_1))) + (2*Tp_2*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
Zz_1 = (np.sqrt(((Tz_1**2)*(omega**(2*alpha_1))) + (2*Tz_1*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
Pp_0 = np.array([(-1*pi*alpha_0)/2]*len(omega)).T#[0]
Pp_1 = np.array([math.atan((Tp_1*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tp_1*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
Pp_2 = np.array([math.atan((Tp_2*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tp_2*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
Zp_1 = np.array([math.atan((Tz_1*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tz_1*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
Z_est = (K*Zz_1)/(Pz_0*Pz_1*Pz_2)
P_est = Zp_1 + Pp_0 - Pp_1 - Pp_2
R_est = np.real([cmath.rect(Z_est[i], P_est[i]) for i in range(len(omega))])#abs()#[:,0]
X_est = np.imag([cmath.rect(Z_est[i], P_est[i]) for i in range(len(omega))])#abs()#[:,0]
RX_est = (R_est + 1j*X_est)
return RX_est
def modelfun(xdata, theta):
ymodel = test_modelfun(xdata,theta)
Zest = 20*log10(np.abs(ymodel))
return Zest
##define sum of squares function for the error in evaluating the likelihood function L(Fobs(i)|q)
def test_ssfun(theta,data):
xdata = data.xdata[0]
ydata = data.ydata[0]
ymodel = test_modelfun(xdata,theta)
return (1/len(omega))*(sum((real(fm)- real(ymodel))**2 + (imag(fm)-imag(ymodel))**2))
#sumsquares = sum((ymodel[:,0]-ydata[:,0])**2)
##import mcmc library and add data to the library in the second line below
itr = 50.0e4
verb = 1
wbar = 1
mcstat = MCMC()
## add model parameters
mcstat.parameters.add_model_parameter(name='th_1',theta0=1, minimum=-2,maximum=3) #m_k, M_k = -2, 3
mcstat.parameters.add_model_parameter(name='th_2',theta0=-1, minimum=-4,maximum=0) #m_a0, M_a0 = -4, 0
mcstat.parameters.add_model_parameter(name='th_3',theta0=-1, minimum=-3,maximum=0) #m_a1, M_a1 = -3, 0
mcstat.parameters.add_model_parameter(name='th_4',theta0=-4, minimum=-9,maximum=0) #m_p1, M_p1 = -9, 0
mcstat.parameters.add_model_parameter(name='th_5',theta0=-4, minimum=-9,maximum=0) #m_p2, M_p2 = -9, 0
mcstat.parameters.add_model_parameter(name='th_6',theta0=-4, minimum=-9,maximum=0) #m_z1, M_z1 = -9, 0
## define simulation options: mh=metropolis-hastings, am=adaptive metropolis, dr=delayed rejection, dram=dr+am
mcstat.simulation_options.define_simulation_options(nsimu=int(itr), updatesigma=1, method='dr', adaptint=100, verbosity=verb, waitbar=wbar)
## define model settings
## extract results
chain = results['chain']# chain for each parameter sampled during simulation. s2
s2chain = results['s2chain']# chain for error variances. if updatesigma=0 then s2chain is an empty list
sschain = results['sschain']# chain for sum-of-squares error calculated using each set of parameter values in the cahin
names = results['names']
burnin = int(itr/2)
## display chain statistics
mcpl = mcstat.mcmcplot
figcp = mcpl.plot_chain_panel(chain, names, figsizeinches = (7,6))
axes = figcp.get_axes()
for ii, ax in enumerate(axes):
ch = chain[:, ii]
ax.plot([burnin, burnin], [ch.min(), ch.max()], 'r')
figpd = mcpl.plot_density_panel(chain[burnin:,:], names, figsizeinches=(7,6))
figpc = mcpl.plot_pairwise_correlation_panel(chain[burnin:,:], names, figsizeinches = (7,6))
mcstat.PI.setup_prediction_interval_calculation(results=results, data=mcstat.data, modelfunction=modelfun, burnin=burnin)
mcstat.PI.generate_prediction_intervals(calc_pred_int=True, waitbar=False)
fg, ax = mcstat.PI.plot_prediction_intervals(adddata=True, plot_pred_int=True, figsizeinches = (7,5), data_display=dict(color='k'))
I have created a code that returns the output that I am after - 2 graphs with multiple lines on each graph. However, the code is slow and quite big (in terms of how many lines of code it takes). I am interested in any improvements I can make that will help me to get such graphs faster, and make my code more presentable.
Additionally, I would like to add more to my graphs (axis names and titles is what I am after). Normally, I would use plt.xlabel,plt.ylabel and plt.title to do so, however I couldn't quite understand how to use them here. The aim here is to add a line to each graph after each loop ( I have adapted this piece of code to do so).
I should note that I need to use Python for this task (so I cannot change to anything else) and I do need Sympy library to find values that are plotted in my graphs.
My code so far is as follows:
import matplotlib.pyplot as plt
import sympy as sym
import numpy as np
x, y = sym.symbols('x, y') # defining our unknown probabilities
al = np.arange(20,1000,5).reshape((196,1)) # values of alpha/beta
prob_of_strA = []
prob_of_strB = []
pen_values = [[0,-5,-10,-25,-50],[0,-25,-50,-125,-250]]
fig1, ax1 = plt.subplots()
fig2, ax2 = plt.subplots()
for j in range(0,len(pen_values[1])):
for i in range(0,len(al)): # choosing the value of beta
A = sym.Matrix([[10, 50], [int(al[i]), pen_values[0][j]]]) # defining matrix A
B = sym.Matrix([[pen_values[1][j], 50], [int(al[i]), 10]]) # defining matrix B
sigma_r = sym.Matrix([[x, 1-x]]) # defining the vector of probabilities
sigma_c = sym.Matrix([y, 1-y]) # defining the vector of probabilities
ts1 = A * sigma_c ; ts2 = sigma_r * B # defining our utilities
y_sol = sym.solvers.solve(ts1[0] - ts1[1],y,dict = True) # solving for y
x_sol = sym.solvers.solve(ts2[0] - ts2[1],x,dict = True) # solving for x
prob_of_strA.append(y_sol[0][y]) # adding the value of y to the vector
prob_of_strB.append(x_sol[0][x]) # adding the value of x to the vector
ax1.plot(al,prob_of_strA,colours[j],label = ["penalty = " + str(pen_values[0][j])]) # plotting value of y for a given penalty value
ax2.plot(al,prob_of_strB,colours[j],label = ["penalty = " + str(pen_values[1][j])]) # plotting value of x for a given penalty value
ax1.legend() # showing the legend
ax2.legend() # showing the legend
prob_of_strA = [] # emptying the vector for the next round
prob_of_strB = [] # emptying the vector for the next round
You can save a couple of lines by initializing your empty vectors inside the loop. You don't have to bother re-defining them at the end.
for j in range(0,len(pen_values[1])):
prob_of_strA = []
prob_of_strB = []
for i in range(0,len(al)): # choosing the value of beta
A = sym.Matrix([[10, 50], [int(al[i]), pen_values[0][j]]]) # defining matrix A
B = sym.Matrix([[pen_values[1][j], 50], [int(al[i]), 10]]) # defining matrix B
sigma_r = sym.Matrix([[x, 1-x]]) # defining the vector of probabilities
sigma_c = sym.Matrix([y, 1-y]) # defining the vector of probabilities
ts1 = A * sigma_c ; ts2 = sigma_r * B # defining our utilities
y_sol = sym.solvers.solve(ts1[0] - ts1[1],y,dict = True) # solving for y
x_sol = sym.solvers.solve(ts2[0] - ts2[1],x,dict = True) # solving for x
prob_of_strA.append(y_sol[0][y]) # adding the value of y to the vector
prob_of_strB.append(x_sol[0][x]) # adding the value of x to the vector
ax1.plot(al,prob_of_strA,colours[j],label = ["penalty = " + str(pen_values[0][j])]) # plotting value of y for a given penalty value
ax2.plot(al,prob_of_strB,colours[j],label = ["penalty = " + str(pen_values[1][j])]) # plotting value of x for a given penalty value
ax1.legend() # showing the legend
ax2.legend() # showing the legend
I simply want to see how long it takes this code to execute. There is a similar question here:
timeit module in python does not recognize numpy module
and I understand what they are saying, but I don't get where these lines of code should be placed. Here is what I have. I know its a little long to scroll through, but you can see where I have placed the timeit commands at the beginning and end. This is not working and I am guessing it is because I have placed these lines of code for timeit incorrectly. The code works if I delete the timeit stuff.
import timeit
u = timeit.Timer("np.arange(1000)", setup = 'import numpy as np')
#set up variables
m = 4.54
g = 9.81
GR = 8
r_pulley = .1
th1=np.pi/4 #based on motor 1 encoder counts. Number of degrees rotated from + x-axis of base frame 0
th2=np.pi/4 #based on motor 2 encoder counts. Number of degrees rotated from + x-axis of m1 frame 1
th3_motor = np.pi/4*12
th3_pulley = th3_motor/GR
#required forces in x,y,z at end effector
fx = 1
fy = 1
fz = m*g #need to figure this out
#Build Homogeneous Tranforms Matrices
H1_0 = np.array(([np.cos(th1),-np.sin(th1),0,0],[np.sin(th1),np.cos(th1),0,0],[0,0,1,l3],[0,0,0,1]))
H2_1 = np.array(([np.cos(th2),-np.sin(th2),0,l1],[np.sin(th2),np.cos(th2),0,0],[0,0,1,0],[0,0,0,1]))
H3_2 = np.array(([1,0,0,l2],[0,1,0,0],[0,0,1,0],[0,0,0,1]))
H2_0 = np.dot(H1_0,H2_1)
H3_0 = np.dot(H2_0,H3_2)
#These HTMs are using the way I derived them, not the "correct" way.
#The answers are the same, but I think the processing time will be the same.
#This is because either way the two matrices with all the sines and cosines...
#will be the same. Only difference is in one method the ones and zeroes...
#matrix is the first HTM, in the other method it is the last HTM. So its the...
#same number of matrices with the same information, just being dot-producted...
#in a different order.
#Build Jacobian
#np.cross(x, y)
d10 = H1_0[0:3, 3]
d20 = H2_0[0:3, 3]
d30 = H3_0[0:3, 3]
subt1 = d30-d10
subt2 = d30-d20
#tsubt1 = subt1.transpose()
#tsubt2 = subt2.transpose()
zeroes = np.array(([0,0,1]))
cross1 = np.cross(zeroes, subt1)
cross2 = np.cross(zeroes, subt2)
#These cross products are correct but need to be tranposed into columns, right now they are a single row.
#dont actually need these transposes but I didnt want to forget the command.
# build jacobian (J)
#J = np.zeros((6,2))
#J[0:3,0] = cross1
#J[0:3,1] = cross2
#J[3:6,0] = zeroes
#J[3:6,1] = zeroes
#find torques
J_force = np.zeros((2,3))
#build force matrix
forces = np.array(([fx],[fy],[fz]))
torques = np.dot(J_force,forces)
torques #top number is theta 1 (M1) and bottom number is theta 2 (M2)
#need to add z axis?
# u is a timer eval np.arange(1000)
u = timeit.Timer("np.arange(1000)", setup = 'import numpy as np')
# print how many seconds needed to run np.arange(1000) 1000000 times
# 1000000 is the default value, you can set by passing a int here.
So the following is what you want.
import timeit
def main():
#set up variables
m = 4.54
g = 9.81
GR = 8
r_pulley = .1
th1=np.pi/4 #based on motor 1 encoder counts. Number of degrees rotated from + x-axis of base frame 0
th2=np.pi/4 #based on motor 2 encoder counts. Number of degrees rotated from + x-axis of m1 frame 1
th3_motor = np.pi/4*12
th3_pulley = th3_motor/GR
#required forces in x,y,z at end effector
fx = 1
fy = 1
fz = m*g #need to figure this out
#Build Homogeneous Tranforms Matrices
H1_0 = np.array(([np.cos(th1),-np.sin(th1),0,0],[np.sin(th1),np.cos(th1),0,0],[0,0,1,l3],[0,0,0,1]))
H2_1 = np.array(([np.cos(th2),-np.sin(th2),0,l1],[np.sin(th2),np.cos(th2),0,0],[0,0,1,0],[0,0,0,1]))
H3_2 = np.array(([1,0,0,l2],[0,1,0,0],[0,0,1,0],[0,0,0,1]))
H2_0 = np.dot(H1_0,H2_1)
H3_0 = np.dot(H2_0,H3_2)
#These HTMs are using the way I derived them, not the "correct" way.
#The answers are the same, but I think the processing time will be the same.
#This is because either way the two matrices with all the sines and cosines...
#will be the same. Only difference is in one method the ones and zeroes...
#matrix is the first HTM, in the other method it is the last HTM. So its the...
#same number of matrices with the same information, just being dot-producted...
#in a different order.
#Build Jacobian
#np.cross(x, y)
d10 = H1_0[0:3, 3]
d20 = H2_0[0:3, 3]
d30 = H3_0[0:3, 3]
subt1 = d30-d10
subt2 = d30-d20
#tsubt1 = subt1.transpose()
#tsubt2 = subt2.transpose()
zeroes = np.array(([0,0,1]))
cross1 = np.cross(zeroes, subt1)
cross2 = np.cross(zeroes, subt2)
#These cross products are correct but need to be tranposed into columns, right now they are a single row.
#dont actually need these transposes but I didnt want to forget the command.
# build jacobian (J)
#J = np.zeros((6,2))
#J[0:3,0] = cross1
#J[0:3,1] = cross2
#J[3:6,0] = zeroes
#J[3:6,1] = zeroes
#find torques
J_force = np.zeros((2,3))
#build force matrix
forces = np.array(([fx],[fy],[fz]))
torques = np.dot(J_force,forces)
torques #top number is theta 1 (M1) and bottom number is theta 2 (M2)
#need to add z axis?
u = timeit.Timer(main)
I wrote a code a while ago that processes spectra using data from text files and performing calculations on them. I started with a code that just does everything line-by-line without any functions, and despite being long, it finishes running in 2.11 seconds (according to %%timeit). Below is that original code, labeled as such.
However, I wanted to put my code into functions instead, to allow for easier readability and usage with different models in the future. Even though I'm using all the same steps as I did before (but this time inside my functions), it is so much slower. This code is also below. Now, I have to wait for about 15-20 minutes to get the same outputs. Why is it so much slower, and is there any way I can make it significantly faster but still use functions?
Original Code:
import re
import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate
filename = 'bpass_spectra.txt'
extinctionfile = 'ExtinctionLawPoints.txt' # from R_V = 4.0
pointslist = []
datalist = []
speclist = []
# Constants
Msun = 1.98892e30 # solar mass [kg]
h = 4.1357e-15 # Planck's constant [eV s]
c = float(3e8) # speed of light [m/s]
# Read spectra file
f = open(filename, 'r')
rawspectra = f.readlines()
met = re.findall('Z\s=\s(\d*\.\d+)', rawspectra[0])
del rawspectra[0]
for i in range(len(rawspectra)):
newlist = rawspectra[i].split(' ')
# Read extinction curve data file
rawpoints = open(extinctionfile, 'r').readlines()
for i in range(len(rawpoints)):
newlst = re.split('(?!\S)\s(?=\S)|(?!\S)\s+(?=\S)', rawpoints[i])
pointslist = pointslist[3:]
lambdalist = [float(item[0]) for item in pointslist]
k_abslist = [float(item[4]) for item in pointslist]
xvallist = [(c*h)/(lamb*1e-6) for lamb in lambdalist]
k_interp = scipy.interpolate.interp1d(xvallist, k_abslist)
# Create new lists
Elist = [float(item[0]) for item in datalist]
speclambdalist = [h*c*1e9/E for E in Elist]
z1list = [float(item[1]) for item in datalist]
met = met[0]
klist = [None]*len(speclist)
Loutlist = [None]*len(speclist)
Tlist = [None]*len(speclist)
# Define parameters
b = 2.0
R = 1.0
z = 1.0
Mgas = 1.0 # mass of gas, input
Mhalo = 2e41 # mass of dark matter halo, known
if float(met) > 0.0052:
DGRlist = [50.0*np.exp(-2.21)*float(met)]*len(speclist)
elif float(met) <= 0.0052:
DGRlist = [((50.0*float(met))**3.15)*np.exp(-0.96)]*len(speclist)
for i in range(len(speclist)):
if float(Elist[i]) <= 4.1357e-3: # frequencies <= 10^12 Hz
klist[i] = 0.1*(float(Elist[i])/(1000.0*h))**b # extinction law [cm^2/g]
elif float(Elist[i]) > 4.1357e-3: # frequencies > 10^12 Hz
klist[i] = k_interp(Elist[i]) # interpolated function's value at Elist[i]
Mdustlist = [Mgas*DGR for DGR in DGRlist] # dust mass
Rhalo = 0.784*(0.27**2.0)*(0.7**(-2.0/3.0))*float(10.0/(1.0+z))*((Mhalo/(1e8*Msun))**(1.0/3.0))
Rdust = 0.018*Rhalo # [kpc]
for i in range(len(speclist)):
Tlist[i] = 3*Mdustlist[i]*klist[i]/(4*np.pi*Rdust)
Linlist = [float(spectra)*R for spectra in speclist]
# Outgoing luminosity as function of wavelength
for i in range(len(Linlist)):
Loutlist[i] = Linlist[i]*np.exp(-Tlist[i])
# Test the calculation
print "LIN ELEMENTS 0 AND 1000:", Linlist[0], Linlist[1000]
print "LOUT ELEMENTS 0 AND 1000:", Loutlist[0], Loutlist[1000]
New "function-ized" Code (much slower):
import re
import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate
# Required files and lists
filename = 'bpass_spectra.txt' # number of columns = 4
extinctionfile = 'ExtinctionLawPoints.txt' # R_V = 4.0
datalist = []
if filename == 'bpass_spectra.txt':
filetype = 4
filetype = 1
if extinctionfile == 'ExtinctionLawPoints.txt':
R_V = 4.0
R_V = 1.0 #to be determined
# Constants
M_sun = 1.98892e30 # solar mass [kg]
h = 4.1357e-15 # Planck's constant [eV s]
c = float(3e8) # speed of light [m/s]
# Inputs
beta = 2.0
R = 1.0
z = 1.0
M_gas = 1.0
M_halo = 2e41
# Read spectra file
f = open(filename, 'r')
rawlines = f.readlines()
met = re.findall('Z\s=\s(\d*\.\d+)', rawlines[0])
del rawlines[0]
for i in range(len(rawlines)):
newlist = rawlines[i].split(' ')
# Read extinction curve data file
rawpoints = open(extinctionfile, 'r').readlines()
def interpolate(R_V, rawpoints, Elist, j):
pointslist = []
if R_V == 4.0:
for i in range(len(rawpoints)):
newlst = re.split('(?!\S)\s(?=\S)|(?!\S)\s+(?=\S)', rawpoints[i])
pointslist = pointslist[3:]
lambdalist = [float(item[0]) for item in pointslist]
k_abslist = [float(item[4]) for item in pointslist]
xvallist = [(c*h)/(lamb*1e-6) for lamb in lambdalist]
k_interp = scipy.interpolate.interp1d(xvallist, k_abslist)
return k_interp(Elist[j])
# Dust extinction function
def dust(interpolate, filetype, datalist, beta, R, z, M_gas, M_halo, met):
speclist = []
if filetype == 4:
metallicity = float(met[0])
Elist = [float(item[0]) for item in datalist]
speclambdalist = [h*c*1e9/E for E in Elist]
met1list = [float(item[1]) for item in datalist]
klist, Tlist = [None]*len(speclist), [None]*len(speclist)
if metallicity > 0.0052:
DGRlist = [50.0*np.exp(-2.21)*metallicity]*len(speclist) # dust to gas ratio
elif metallicity <= 0.0052:
DGRlist = [((50.0*metallicity)**3.15)*np.exp(-0.96)]*len(speclist)
for i in range(len(speclist)):
if Elist[i] <= 4.1357e-3: # frequencies <= 10^12 Hz
klist[i] = 0.1*(float(Elist[i])/(1000.0*h))**beta # extinction law [cm^2/g]
elif Elist[i] > 4.1357e-3: # frequencies > 10^12 Hz
klist[i] = interpolate(R_V, rawpoints, Elist, i) # interpolated function's value at Elist[i]
Mdustlist = [M_gas*DGR for DGR in DGRlist] # dust mass
R_halo = 0.784*(0.27**2.0)*(0.7**(-2.0/3.0))*float(10/(1+z))*((M_halo/(1e8*M_sun))**(1.0/3.0))
R_dust = 0.018*R_halo # [kpc]
# Optical depth calculation
Tlist = [3*Mdustlist[i]*klist[i]/(4*np.pi*R_dust) for i in range(len(speclist))]
# Ingoing and outgoing luminosities as functions of wavelength
Linlist = [float(spectra)*R for spectra in speclist]
Loutlist = [Linlist[i]*np.exp(-Tlist[i]) for i in range(len(speclist))]
return speclambdalist, Linlist, Loutlist
print dust(interpolate, filetype, datalist, beta, R, z, M_gas, M_halo, met)
Even when I only have the function return Loutlist instead of the tuple of 3 lists, it's still extremely slow. Any ideas on why this is? Also, I'm going to want to return the tuple and then plot speclambdalist versus Linlist, and also plot speclambdalist versus Loutlist on the same plot. But I'm under the impression that each time I call dust(interpolate, filetype, datalist, beta, R, z, M_gas, M_halo, met)[i] where i = 0, 1, or 2 (I'll be doing this multiple times), it'll have to run the function again each time. Is there any way to bypass these extra runs to further increase speed? Thank you!