I have a model made up of a sum of several lmfit StepModel(form='erf') steps, which gives a pretty good fit to my data. However, the residual shows some skew at each step. I can use skew(output.result.residual) to give me the skew over the entire dataset, but what I would really like is to have a value of skew for each component of the model, that is, a skew1 associated with step1, skew2 with step2, etc.
I've changed the code in examples/doc_builtinmodels_stepmodel.py to give an example of what I mean:
import matplotlib.pyplot as plt
import numpy as np
from lmfit.models import LinearModel, StepModel
x = np.linspace(0, 10, 201)
a = np.ones_like(x)
b = np.ones_like(x)
a[:48] = 0.0
a[48:77] = np.arange(77-48)/(77.0-48)
b[:10] = 0.0
b[10:39] = np.arange(39-10)/(39.0-10)
np.random.seed(0)
a = 110.2 * (a + 9e-3*np.random.randn(x.size)) + 12.0 + 2.22*x
b = 110.2 * (b + 9e-3*np.random.randn(x.size)) + 12.0 + 2.22*x
y = a + b
step_mod = StepModel(form='erf', prefix='s1_')
step2_mod = StepModel(form='erf', prefix='s2_')
line_mod = LinearModel(prefix='line_')
pars = line_mod.make_params(intercept=y.min(), slope=0)
pars += step_mod.guess(y, x=x, center=2.5)
pars += step2_mod.guess(y, x=x, center=2.5)
mod = step_mod + step2_mod + line_mod
out = mod.fit(y, pars, x=x)
out.plot(data_kws={'markersize': 1})
print(skew(out.result.residual))
I suspect I may have to build the skew directly into the StepModel() function, but I admit I don't really know how to do that.
Related
I am trying to use curve fitting to find coefficients for an equation using multiple datasets. The equation itself is piecewise, it is defined as :
In this equation, we don't know the break point Po. The variable
I have tried using scipy curve_fit and lmfit. Curve_fit succefully fitted the data for some datasets but failed miserably in others. Here is the code for lmfit inspired by this answer and Curve_fit inspired by this answer:
import pandas as pd
import matplotlib
from scipy.signal import savgol_filter
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.transforms as mtrans
from sklearn import linear_model
import csv
from scipy import stats
from sklearn import preprocessing
from scipy.special import erf,erfc
from lmfit import minimize, Parameters, Model
from sklearn.linear_model import LinearRegression
power_level_for_prediction = [45,50,60,69,71,88]
group_by_column = "mem_pow"
critical_device_power_name = "core_pow"
files = pd.read_csv("file_path")
def residual(params,x,y = None):
param1 = params['a']
param2 = params['b']
param3 = params['x0']
param4 = params['c']
param5 = params['d']
dx = (max(x) - min(x))/(len(x) -1)
xhi = (erf((x-param3)/dx) + 1)/2.0
xlo = (erfc((x-param3)/dx) + 1)/2.0
# p = xlo*param4*np.exp(param5*x) + xhi*(param1*x+param2)
p = xlo*(param1*x + param2) + xhi*(param4*x + param5)
# p = param1*x + param2
# p[np.where(param2 < x)] = param3*x + param2
if y is None:
return p
return p - y
def linear_lmfit(x,y):
params = Parameters()
params.add('a', value = 0.1)
params.add('b', value = 0.2)
params.add('c', value = 0.3)
params.add('d', value = 0.4,min = -5, max =5)
params.add('x0', value = 120)
out = minimize(residual,params,args = (x,y))
fit = residual(out.params,x)
return fit
def piecewise_linear(x, x0, y0, a, c):
# Represntation of above equation. here b and d from above equation, would remain same.
return np.piecewise(x, [x< x0],[lambda x: a*x + y0-a*x0, lambda x: c*x + y0-c*x0])
def linear(files):
files_grouped = files.groupby(group_by_column)
rows, columns = (2,3)
fig, ax = plt.subplots(rows,columns,figsize = (20,10))
k = 0
for name, group in files_grouped:
x = group[critical_device_power_name].to_numpy().astype(float)
y = group['elapsed_time'].to_numpy().astype(float)
if name in power_level_for_prediction:
i = math.floor( k / columns)
j = k % columns
p ,e = curve_fit(piecewise_linear,x,y)
#pred = piecewise_linear(x,*p)
pred = linear_lmfit(x,y)
ax[i][j].plot(x,y,label = "Actual Elapsed Time")
ax[i][j].plot(x,pred, label = "Predicted Elapsed Time")
ax[i][j].grid()
ax[i][j].set_title(f"Prediction Result for {name}W {group_by_column}")
ax[i][j].set_ylabel(r"$T_c$ (sec)")
ax[i][j].set_xlabel(f"{critical_device_power_name}")
ax[i][j].legend(title = f'{group_by_column}')
k = k+1
fig.suptitle(f"{experiment_name}")
fig.tight_layout()
plt.show()
Result using LMFIT:
I have no clue, why LMFIT is showing this type of result. Do you think is it because of the intial value.
and here is the result for the curve_fit:
As seen in the graph, for some mem_pow values the graph is somewhat good but for other it is quite bad. I am unable to understand the reason behind this. In my opinion, the curve fitting is failling for mem_pow level because the second piecwise function is quite flat and the function fails to fit that part.
Here is the csv file :
https://gist.github.com/kulnaman/8952e9c14ec5e8dcf2bbbd40f2dccdaa
I'm trying to use logistic regression on the popularity of hits songs on Spotify from 2010-2019 based on their durations and durability, whose data are collected from a .csv file. Basically, since the popularity values of each song are numerical, I have converted each of them to binary numbers "0" to "1". If the popularity value of a hit song is less than 70, I will replace its current value to 0, and vice versa if its value is more than 70. For some reason, as the rest of my code is pretty standard in creating a sigmoid function, the end result is a straight line instead of a sigmoid curve.
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('top10s [SubtitleTools.com] (2).csv')
BPM = df.bpm
BPM = np.array(BPM)
Energy = df.nrgy
Energy = np.array(Energy)
Dance = df.dnce
Dance = np.array(Dance)
dB = df.dB
dB = np.array(dB)
Live = df.live
Live = np.array(Live)
Valence = df.val
Valence = np.array(Valence)
Acous = df.acous
Acous = np.array(Acous)
Speech = df.spch
Speech = np.array(Speech)
df.loc[df['popu'] <= 70, 'popu'] = 0
df.loc[df['popu'] > 70, 'popu'] = 1
def Logistic_Regression(X, y, iterations, alpha):
ones = np.ones((X.shape[0], ))
X = np.vstack((ones, X))
X = X.T
b = np.zeros(X.shape[1])
for i in range(iterations):
z = np.dot(X, b)
p_hat = sigmoid(z)
gradient = np.dot(X.T, (y - p_hat))
b = b + alpha * gradient
if (i % 1000 == 0):
print('LL, i ', log_likelihood(X, y, b), i)
return b
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def log_likelihood(X, y, b):
z = np.dot(X, b)
LL = np.sum(y*z - np.log(1 + np.exp(z)))
return LL
def LR1():
Dur = df.dur
Dur = np.array(Dur)
Pop = df.popu
Pop = [int(i) for i in Pop]; Pop = np.array(Pop)
plt.figure(figsize=(10,8))
colormap = np.array(['r', 'b'])
plt.scatter(Dur, Pop, c = colormap[Pop], alpha = .4)
b = Logistic_Regression(Dur, Pop, iterations = 8000, alpha = 0.00005)
print('Done')
p_hat = sigmoid(np.dot(Dur, b[1]) + b[0])
idxDur = np.argsort(Dur)
plt.plot(Dur[idxDur], p_hat[idxDur])
plt.show()
LR1()
df
Your logreg params arent coming out correctly, thus something is wrong in your gradient descent.
If I do
from sklearn.linear_model import LogisticRegression
df = pd.DataFrame({'popu':[0,1,0,1,1,0,0,1,0,0],'dur'[217,283,200,295,221,176,206,260,217,213]})
logreg = LogisticRegression()
logreg.fit(Dur.reshape([10,1]),Pop.reshape([10,1]))
print(logreg.coef_)
print(logreg.intercept_)
I get [0.86473507, -189.79655798]
whereas your params (b) come out [0.012136874150412973 -0.2430389407767768] for this data.
Plot of your vs scikit logregs here
The goal is to plot two identical dynamical systems that are coupled.
We have:
X = [x0,x1,x2]
U = [u0,u1,u2]
And
Xdot = f(X) + alpha*(U-X)
Udot = f(U) + alpha*(X-U)
So I wish to plot the solution to this grand system on one set of axes (i.e in xyz for example) and eventually change the coupling strength to investigate the behaviour.
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from mpl_toolkits.mplot3d import Axes3D
def couple(s,t,a=0.2,beta=0.2,gamma=5.7,alpha=0.03):
[x,u] = s
[u0,u1,u2] = u
[x0,x1,x2] = x
xdot = np.zeros(3)
xdot[0] = -x1-x2
xdot[1] = x0+a*x1
xdot[2] = beta + x2*(x0-gamma)
udot = np.zeros(3)
udot[0] = -u1-u2
udot[1] = u0+a*u1
udot[2] = beta + u2*(u0-gamma)
sdot = np.zeros(2)
sdot[0] = xdot + alpha*(u-x)
sdot[1] = udot + alpha*(x-u)
return sdot
s_init = [0.1,0.1]
t_init=0; t_final = 300; t_step = 0.01
tpoints = np.arange(t_init,t_final,t_step)
a=0.2; beta=0.2; gamma=5.7; alpha=0.03
y = odeint(couple, s_init, tpoints,args=(a,beta,gamma,alpha), hmax = 0.01)
I imagine that something is wrong with s_init since it should be TWO initial condition vectors but when I try that I get that "odeint: y0 should be one-dimensional." On the other hand when I try s_init to be a 6-vector I get "too many values to unpack (expected two)." With the current setup, I am getting the error
File "C:/Users/Python Scripts/dynsys2019work.py", line 88, in couple
[u0,u1,u2] = u
TypeError: cannot unpack non-iterable numpy.float64 object
Cheers
*Edit: Please note this is basically my first time attempting this kind of thing and will be happy to receive further documentation and references.
The ode definition takes in and returns a 1D vector in scipy odeint, and I think some of your confusion is that you actually have 1 system of ODEs with 6 variables. You have just mentally apportioned it into 2 separate ODEs that are coupled.
You can do it like this:
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import numpy as np
def couple(s,t,a=0.2,beta=0.2,gamma=5.7,alpha=0.03):
x0, x1, x2, u0, u1, u2 = s
xdot = np.zeros(3)
xdot[0] = -x1-x2
xdot[1] = x0+a*x1
xdot[2] = beta + x2*(x0-gamma)
udot = np.zeros(3)
udot[0] = -u1-u2
udot[1] = u0+a*u1
udot[2] = beta + u2*(u0-gamma)
return np.ravel([xdot, udot])
s_init = [0.1,0.1, 0.1, 0.1, 0.1, 0.1]
t_init=0; t_final = 300; t_step = 0.01
tpoints = np.arange(t_init,t_final,t_step)
a=0.2; beta=0.2; gamma=5.7; alpha=0.03
y = odeint(couple, s_init, tpoints,args=(a,beta,gamma,alpha), hmax = 0.01)
plt.plot(tpoints,y[:,0])
I am very new to Python, and I try to minimize some function, but the result seems inaccurate, or at least the difference with the Matlab result is too big.
My two questions are:
(1) Am I right to assume the difference in the results comes from an inaccurate Python solution?
(I believe so because for example change(c1)/change(y1) is constant in Matlab as I believe it should be, while it changes quite a bit in Python.)
(2) What can I do to improve the accuracy of the Python result?
I have already tried other methods (TNC, L-BFGS-B), and providing an analytical gradient or more accurate numerical gradient, other routines (minimize_scalar with method='Bounded'), but they all give pretty much the same result.
here is my code:
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import Bounds
#need to define nu
def ut_fun(CC):
if nu != 1:
UU = (CC ** (1-nu) - 1) / (1-nu)
else:
UU = log(CC)
return UU
#need to define RR, A1,y1,y2_L,y2_H,gamma,beta,bb
def obj_2per(c1):
A2 = RR*A1 + y1 - c1
c2_L = RR*A2 + y2_L
U_L = ut_fun(c2_L)
c2_H = RR*A2 + y2_H
U_H = ut_fun(c2_H)
EV2 = gamma*U_L + (1-gamma)*U_H
mVV = - (ut_fun(c1) + beta*EV2)
return mVV
nu = 2
A1 = 0
bb = 0
beta = 0.96
RR = 1/beta
y2_L = 0.1
y2_H = 0.2
gamma = 0.5
#Pre-allocation in np arrays
y1_vec = np.linspace(0.04,0.4,10)
c1_star = np.zeros(len(y1_vec))
#Actual optimization:
c1_0 = 0.01
for ii in range(len(y1_vec)):
y1 = y1_vec[ii]
ub = RR*A1 + y1 - bb
bnds = [(-np.inf,ub)]
sol = minimize(obj_2per,c1_0,method='trust-constr', bounds=bnds)
c1_star[ii] = float(sol.x)
c1_0 = c1_star[ii];
print(c1_star)
The Python result is:
[0.03999284 0.07995512 0.11997128
0.14458588 0.16599669 0.18724888
0.20837178 0.22939139 0.25032751
0.27119543]
The Matlab result is:
0.0399997050892807 0.0799994508682207 0.119999719341015 0.153878407968280 0.174286891630529 0.194695468467231 0.215103764323911 0.235511996564921 0.255920191410148 0.276328383256344
The difference in results from the fourth entry onwards is too large.
I am trying to plot the phase potrait for the equation as defined in my sh2 function in the code below. I know the expected phase plot should be [[expected phase plot][1]][1] [1]: https://i.stack.imgur.com/y1T9Y.png.
However this is my result: [[result][1]: https://i.stack.imgur.com/EuSOm.png.
I am using integrate.odeint can anyone suggest what I could change in the could below or if there would be best to use another algorithm that would give me a closer result to th expected.
Please find my code :
import matplotlib.pyplot as plt
import numpy as np
from numpy import sin
import scipy.integrate as integrate
from math import *
g = 9.81
l = 1.6
l_big = 2.0
l_small = 1.6
m = 0.01
alpha = l_big-l_small
k = 10*(10**40)
def sh2(r1,t):
theta1,omega1 = r1
sh2_theta1 = omega1
sh2_omega1 = -g*(l + ((1/2)*alpha*(1-np.tanh(theta1*omega1*k))))*sin(theta1)
return np.array([sh2_theta1, sh2_omega1],float)
init_state = np.radians([30.0,0])
dt = 1/10.0
time = np.arange(0,10.0,dt)
timexo = np.arange(0,10.0,dt)
state2 = integrate.odeint(sh2,init_state,time)
print(len(state2),len(timexo))
state2_plot = np.transpose(state2[0:2500])
plt.plot(timexo[0:2500],state2_plot[1], '--m', label = r'$\theta = \frac{\pi}{6}$')
plt.xlabel('Time t (s) ')
plt.ylabel('Angular Velocity' ' ' r'$\dot{\theta}$')
plt.show()
#code for phase plot
# initial values
x_0 = 0.0 # intial angular position
v_0 = 1.0 # initial angular momentum
t_0 = 0 # initial time
# initial y-vector from initial position and momentum
y0 = np.array([x_0,v_0])
# max value of time and points in time to integrate to
t_max = 10
N_spacing_in_t = 10000
# create vector of time points you want to evaluate
t = np.linspace(t_0,t_max,N_spacing_in_t)
# create vector of positions for those times
y_result = integrate.odeint(sh2, init_state, t)
# get angle and angular momentum
angle = y_result[:,0]
angular_velocity = y_result[:,1]
# plot result
fig = plt.figure()
plt.plot(angle, angular_velocity,'--k',lw=1)
plt.xlabel('Angle' ' ' r'$\theta$')
plt.ylabel(r'Angular Velocity' r' $\dot{\theta}$')
plt.gcf().savefig('pumping.png',dpi=300)
plt.show()
Thank you for tour time