How to Fit to The Outer Shell of a Function - python

I am trying to make a gaussian fit on a function that is messy. I want to only fit the exterior outer shell (these are not just the max values at each x, because some of the max values will be too low too, because the sample size is low).
from scipy.optimize import curve_fit
def Gauss(x, a, x0, sigma, offset):
return a * np.exp(-np.power(x - x0,2) / (2 * np.power(sigma,2))) + offset
def fitNormal(x, y):
popt, pcov = curve_fit(Gauss, x, y, p0=[np.max(y), np.median(x), np.std(x), np.min(y)])
return popt
plt.plot(xPlot,yPlot, 'k.')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Y(x)')
x,y = xPlot,yPlot
popt = fitNormal(x, y)
minx, maxx = np.min(x), np.max(x)
xFit = np.arange(start=minx, stop=maxx, step=(maxx-minx)/1000)
yFitTest = Gauss(xPlot, popt[0], popt[1], popt[2], popt[3])
print('max fit test: ',np.max(yFitTest))
print('max y: ',np.max(yPlot))
maxIndex = np.where(yPlot==np.max(yPlot))[0][0]
factor = yPlot[maxIndex]/yFitTest[maxIndex]
yFit = Gauss(xPlot, popt[0], popt[1], popt[2], popt[3]) * factor
plt.plot(xFit,yFit,'r')

This is an iterative approach similar to this post. It is different in the sense that the shape of the graph does not permit the use of convex hull. So the idea is to create a cost function that tries to minimize the area of the graph while paying high cost if a point is above the graph. Depending on the type of the graph in OP the cost function needs to be adapted. One also has to check if in the final result all points are really below the graph. Here one can fiddle with details of the cost function. One my, e.g., include an offset in the tanh like tanh( slope * ( x - offset) ) to push the solution farther away from the data.
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import leastsq
def g( x, a, s ):
return a * np.exp(-x**2 / s**2 )
def cost_function( params, xData, yData, slope, val ):
a,s = params
area = 0.5 * np.sqrt( np.pi ) * a * s
diff = np.fromiter ( ( y - g( x, a, s) for x, y in zip( xData, yData ) ), np.float )
cDiff = np.fromiter( ( val * ( 1 + np.tanh( slope * d ) ) for d in diff ), np.float )
out = np.concatenate( [ [area] , cDiff ] )
return out
xData = np.linspace( -5, 5, 500 )
yData = np.fromiter( ( g( x, .77, 2 ) * np.sin( 257.7 * x )**2 for x in xData ), np.float )
sol=[ [ 1, 2.2 ] ]
for i in range( 1, 6 ):
solN, err = leastsq( cost_function, sol[-1] , args=( xData, yData, 10**i, 1 ) )
sol += [ solN ]
print sol
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1)
ax.scatter( xData, yData, s=1 )
for solN in sol:
solY = np.fromiter( ( g( x, *solN ) for x in xData ), np.float )
ax.plot( xData, solY )
plt.show()
giving
>> [0.8627445 3.55774814]
>> [0.77758636 2.52613376]
>> [0.76712184 2.1181137 ]
>> [0.76874125 2.01910211]
>> [0.7695663 2.00262339]
and

Here is a different approach using scipy's Differental Evolution module combined with a "brick wall", where if any predicted value during the fit is greater than the corresponding Y value, the fitting error is made extremely large. I have shamelessly poached code from the answer of #mikuszefski to generate the data used in this example.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings
from scipy.optimize import differential_evolution
def g( x, a, s ):
return a * np.exp(-x**2 / s**2 )
xData = np.linspace( -5, 5, 500 )
yData = np.fromiter( ( g( x, .77, 2 )* np.sin( 257.7 * x )**2 for x in xData ), np.float )
def Gauss(x, a, x0, sigma, offset):
return a * np.exp(-np.power(x - x0,2) / (2 * np.power(sigma,2))) + offset
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = Gauss(xData, *parameterTuple)
multiplier = 1.0
for i in range(len(val)):
if val[i] < yData[i]: # ****** brick wall ******
multiplier = 1.0E10
return np.sum((multiplier * (yData - val)) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
minData = min(minX, minY)
maxData = max(maxX, maxY)
parameterBounds = []
parameterBounds.append([minData, maxData]) # parameter bounds for a
parameterBounds.append([minData, maxData]) # parameter bounds for x0
parameterBounds.append([minData, maxData]) # parameter bounds for sigma
parameterBounds.append([minData, maxData]) # parameter bounds for offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3, polish=False)
return result.x
# generate initial parameter values
geneticParameters = generate_Initial_Parameters()
# create values for display of fitted function
y_fit = Gauss(xData, *geneticParameters)
plt.scatter(xData, yData, s=1 ) # plot the raw data
plt.plot(xData, y_fit) # plot the equation using the fitted parameters
plt.show()
print('parameters:', geneticParameters)

Related

How to make a piecewise linear fit in Python with some constant pieces?

I'm trying to make a piecewise linear fit consisting of 3 pieces whereof the first and last pieces are constant. As you can see in this figure
don't get the expected fit, since the fit doesn't capture the 3 linear pieces clearly visual from the original data points.
I've tried following this question and expanded it to the case of 3 pieces with the two constant pieces, but I must have done something wrong.
Here is my code:
from scipy import optimize
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.rcParams['figure.figsize'] = [16, 6]
x = np.arange(0, 50, dtype=float)
y = np.array([50 for i in range(10)]
+ [50 - (50-5)/31 * i for i in range(1, 31)]
+ [5 for i in range(10)],
dtype=float)
def piecewise_linear(x, x0, y0, x1, y1):
return np.piecewise(x,
[x < x0, (x >= x0) & (x < x1), x >= x1],
[lambda x:y0, lambda x:(y1-y0)/(x1-x0)*(x-x0)+y0, lambda x:y1])
p , e = optimize.curve_fit(piecewise_linear, x, y)
xd = np.linspace(0, 50, 101)
plt.plot(x, y, "o", label='original data')
plt.plot(xd, piecewise_linear(xd, *p), label='piecewise linear fit')
plt.legend()
The accepted answer to the previous mentioned question suggest looking at segments_fit.ipynb for the case of N parts, but following that it doesn't seem that I can specify, that the first and last pieces should be constant.
Furthermore I do get the following warning:
OptimizeWarning: Covariance of the parameters could not be estimated
What do I do wrong?
You could directly copy the segments_fit implementation
from scipy import optimize
def segments_fit(X, Y, count):
xmin = X.min()
xmax = X.max()
seg = np.full(count - 1, (xmax - xmin) / count)
px_init = np.r_[np.r_[xmin, seg].cumsum(), xmax]
py_init = np.array([Y[np.abs(X - x) < (xmax - xmin) * 0.01].mean() for x in px_init])
def func(p):
seg = p[:count - 1]
py = p[count - 1:]
px = np.r_[np.r_[xmin, seg].cumsum(), xmax]
return px, py
def err(p):
px, py = func(p)
Y2 = np.interp(X, px, py)
return np.mean((Y - Y2)**2)
r = optimize.minimize(err, x0=np.r_[seg, py_init], method='Nelder-Mead')
return func(r.x)
Then you apply it as follows
import numpy as np;
# mimic your data
x = np.linspace(0, 50)
y = 50 - np.clip(x, 10, 40)
# apply the segment fit
fx, fy = segments_fit(x, y, 3)
This will give you (fx,fy) the corners your piecewise fit, let's plot it
import matplotlib.pyplot as plt
# show the results
plt.figure(figsize=(8, 3))
plt.plot(fx, fy, 'o-')
plt.plot(x, y, '.')
plt.legend(['fitted line', 'given points'])
EDIT: Introducing constant segments
As mentioned in the comments the above example doesn't guarantee that the output will be constant in the end segments.
Based on this implementation the easier way I can think is to restrict func(p) to do that, a simple way to ensure a segment is constant, is to set y[i+1]==y[i]. Thus I added xanchor and yanchor. If you give an array with repeated numbers you can bind multiple points to the same value.
from scipy import optimize
def segments_fit(X, Y, count, xanchors=slice(None), yanchors=slice(None)):
xmin = X.min()
xmax = X.max()
seg = np.full(count - 1, (xmax - xmin) / count)
px_init = np.r_[np.r_[xmin, seg].cumsum(), xmax]
py_init = np.array([Y[np.abs(X - x) < (xmax - xmin) * 0.01].mean() for x in px_init])
def func(p):
seg = p[:count - 1]
py = p[count - 1:]
px = np.r_[np.r_[xmin, seg].cumsum(), xmax]
py = py[yanchors]
px = px[xanchors]
return px, py
def err(p):
px, py = func(p)
Y2 = np.interp(X, px, py)
return np.mean((Y - Y2)**2)
r = optimize.minimize(err, x0=np.r_[seg, py_init], method='Nelder-Mead')
return func(r.x)
I modified a little the data generation to make it more clear the effect of the change
import matplotlib.pyplot as plt
import numpy as np;
# mimic your data
x = np.linspace(0, 50)
y = 50 - np.clip(x, 10, 40) + np.random.randn(len(x)) + 0.25 * x
# apply the segment fit
fx, fy = segments_fit(x, y, 3)
plt.plot(fx, fy, 'o-')
plt.plot(x, y, '.k')
# apply the segment fit with some consecutive points having the
# same anchor
fx, fy = segments_fit(x, y, 3, yanchors=[1,1,2,2])
plt.plot(fx, fy, 'o--r')
plt.legend(['fitted line', 'given points', 'with const segments'])
You can get a one line solution (not counting the import) using univariate splines of degree one. Like this
from scipy.interpolate import UnivariateSpline
f = UnivariateSpline(x,y,k=1,s=0)
Here k=1 means we interpolate using polynomials of degree one aka lines. s is the smoothing parameter. It decides how much you want to compromise on the fit to avoid using too many segments. Setting it to zero means no compromises i.e. the line HAS to go threw all points. See the documentation.
Then
plt.plot(x, y, "o", label='original data')
plt.plot(x, f(x), label='linear interpolation')
plt.legend()
plt.savefig("out.png", dpi=300)
gives
This i consider a funny non-linear approach that works quite well.
Note that even though this is highly non-linear it approximates the linear behavior very well. Moreover, the fit parameters provide the linear results. Only for the offset b a little transformation and according error propagation is required. (Also, I don't care about the value of p as long as it is somewhat larger than 5)
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
np.set_printoptions( linewidth=250, precision=4)
np.set_printoptions( linewidth=250, precision=4)
### piecewise linear function for data generation
def pwl( x, m, b, a1, a2 ):
if x < a1:
out = pwl( a1, m, b, a1, a2 )
elif x > a2:
out = pwl( a2, m, b, a1, a2 )
else:
out = m * x + b
return out
### non-linear approximation
def func( x, m, b, a1, a2, p ):
out = b + np.log(
1 / ( 1 + np.exp( -m *( x - a1 ) )**p )
) / p - np.log(
1 / ( 1 + np.exp( -m * ( x - a2 ) )**p )
) / p
return out
### some data
nn = 36
xdata = np.linspace( -5, 19, nn )
ydata = np.fromiter( (pwl( x, -2.1, 11.6, -1.1, 12.7 ) for x in xdata ), float)
ydata += np.random.normal( size=nn, scale=0.2)
### dense grid for printing
xth = np.linspace( -5, 19, 150 )
###fitting
popt, cov = curve_fit( func, xdata, ydata, p0=[-2, 11, -1, 10, 1])
mF, betaF, a1F, a2F, pF = popt
bF = betaF - mF * a1F
sol=( mF, bF, a1F, a2F, pF )
### transforming the covariance due to the b' -> b mapping
J1 = np.identity(5)
J1[1,0] = -popt[2]
J1[1,2] = -popt[0]
cov2 = np.dot( J1, np.dot( cov, np.transpose( J1 ) ) )
### results
print( cov2 )
for i, v in enumerate( ("m", "b", "a1", "a2", "p" ) ):
print( "{:>2} = {:+2.4e} ± {:0.4e}".format( v, sol[i], np.sqrt( cov2[i,i] ) ) )
### plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xdata, ydata, ls='', marker='+' )
ax.plot( xth, func( xth, -2, 11, -1, 10, 1 ) )
ax.plot( xth, func( xth, *popt ) )
plt.show()
Providing
[[ 1.3553e-04 -7.6291e-04 -4.3488e-04 4.5624e-04 1.2619e-01]
[-7.6291e-04 6.4126e-03 3.4560e-03 -1.5573e-03 -7.4983e-01]
[-4.3488e-04 3.4560e-03 3.4741e-03 -9.8284e-04 -4.2344e-01]
[ 4.5624e-04 -1.5573e-03 -9.8284e-04 3.0842e-03 -5.2739e+00]
[ 1.2619e-01 -7.4983e-01 -4.2344e-01 -5.2739e+00 3.1583e+05]]
m = -2.0810e+00 ± 9.7718e-03
b = +1.1463e+01 ± 6.7217e-02
a1 = -1.2545e+00 ± 5.0384e-02
a2 = +1.2739e+01 ± 4.7176e-02
p = +1.6840e+01 ± 2.9872e+02
and

How to fix the "OptimizeWarning: Covariance of the parameters could not be estimated" for Scipy.optimize curve_fit?

I was trying to fit a curve to my designed points using curve_fit function. However, I got the warning message and not data output from the curve_fit function. Please find the attached screenshots below.
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
#Fitting function
def func(x, a, b):
print("x: "+str(x))
print("b: "+str(b))
#return a*np.exp(b*x)
#return a*x+b
return a*x/(b+x)
#Experimental x and y data points
xData = np.array([ 31875.324,31876.35,31877.651,31878.859,31879.771,31880.657,31881.617,31882.343, \
31883.099,31883.758,31884.489,31885.311,31886.084,31886.736,31887.582,31888.262, \
31888.908,31889.627,31890.312,31890.989,31891.534,31892.142,31892.759,31893.323, \
31893.812,31894.397,31894.928,31895.555,31896.211,31896.797,31898.16,31898.761, \
31899.462,31900.099,31900.609,31901.197,31901.815,31902.32,31902.755,31903.235, \
31903.698,31904.232,31904.776,31905.291,31905.806,31906.182,31906.533,31906.843, \
31907.083,31907.175,31822.221,31833.14,31846.066,31860.254,31875.324], dtype=np.longdouble)
yData = np.array([ 7999.026,7999.483,8000.048,8000.559,8000.937,8001.298,8001.683,8001.969,8002.263, \
8002.516,8002.793,8003.101,8003.387,8003.625,8003.931,8004.174,8004.403,8004.655, \
8004.892,8005.125,8005.311,8005.517,8005.725,8005.913,8006.076,8006.269,8006.443, \
8006.648,8006.861,8007.05,8007.486,8007.677,8007.899,8008.1,8008.259,8008.443, \
8008.636,8008.793,8008.929,8009.077,8009.221,8009.386,8009.554,8009.713,8009.871, \
8009.987,8010.095,8010.19,8010.264,8010.293,7956.451,7969.307,7981.115,7991.074,7999.026], dtype=np.longdouble)
#Plot experimental data points
plt.plot(xData, yData, 'bo', label='experimental-data')
# Initial guess for the parameters
initialGuess = [31880.0,8000.0]
#Perform the curve-fit
popt, pcov = curve_fit(func, xData, yData, initialGuess)
print("popt"+str(popt))
#x values for the fitted function
xFit = np.arange(0.0, 50.0, 0.001, dtype=np.longdouble)
print("xFit"+str(xFit))
#Plot the fitted function
plt.plot(xFit, func(xFit, *popt), 'r', label='fit params: a=%5.3f, b=%5.3f' % tuple(popt))
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
The screeshot for the warning message.
The plot for the result.
Try this fit, which works. The original function will not really fit the data. For large x it converges to a. That's okay. One has a drop to smaller x but only if b is positive. Moreover, one is lacking a parameter that is scaling the drop-rate. So with a<0 and b<0 one gets the right shape, but is converging to a negative number. i.e. an offset is missing. Hence the use of y = a + b / ( x + c )
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.integrate import cumtrapz
import numpy as np
#Fitting function
def func( x, a, b, c ):
return a + b / ( x + c )
#Experimental x and y data points
xData = np.array([
31875.324, 31876.35, 31877.651, 31878.859, 31879.771, 31880.657,
31881.617, 31882.343, 31883.099, 31883.758, 31884.489, 31885.311,
31886.084, 31886.736, 31887.582, 31888.262, 31888.908, 31889.627,
31890.312, 31890.989, 31891.534, 31892.142, 31892.759, 31893.323,
31893.812, 31894.397, 31894.928, 31895.555, 31896.211, 31896.797,
31898.16, 31898.761, 31899.462, 31900.099, 31900.609, 31901.197,
31901.815, 31902.32, 31902.755, 31903.235, 31903.698, 31904.232,
31904.776, 31905.291, 31905.806, 31906.182, 31906.533, 31906.843,
31907.083, 31907.175, 31822.221, 31833.14, 31846.066, 31860.254,
31875.324
], dtype=float )
yData = np.array([
7999.026, 7999.483, 8000.048, 8000.559, 8000.937, 8001.298,
8001.683, 8001.969, 8002.263, 8002.516, 8002.793, 8003.101,
8003.387, 8003.625, 8003.931, 8004.174, 8004.403, 8004.655,
8004.892, 8005.125, 8005.311, 8005.517, 8005.725, 8005.913,
8006.076, 8006.269, 8006.443, 8006.648, 8006.861, 8007.05,
8007.486, 8007.677, 8007.899, 8008.1, 8008.259, 8008.443,
8008.636, 8008.793, 8008.929, 8009.077, 8009.221, 8009.386,
8009.554, 8009.713, 8009.871, 8009.987, 8010.095, 8010.19,
8010.264, 8010.293, 7956.451, 7969.307, 7981.115, 7991.074,
7999.026
], dtype=float )
#x values for the fitted function
xFit = np.linspace( 31820, 31950, 150 )
### sorting and shifting t get reasonable values
### scaling also would be a good idea, but I skip this part here
mx=np.mean( xData )
my=np.mean( yData )
data = np.column_stack( ( xData - mx , yData - my ) )
data = np.sort( data, axis=0 )
### getting good starting values using the fact that
### int x y = ( b + a c) x + a/2 x^2 - c * ( int y ) + const
### With two simple and fast numerical integrals, hence, we get a good
### guess for a, b, c from a simple linear fit.
Sy = cumtrapz( data[:, 1], data[:, 0], initial = 0 )
Sxy = cumtrapz( data[:, 0] * data[:, 1], data[:, 0], initial = 0 )
ST = np.array([
data[:, 0], data[:, 0]**2, Sy, np.ones( len( data ) )
])
S = np.transpose( ST )
A = np.dot( ST, S )
eta = np.dot( ST, Sxy )
sol = np.linalg.solve( A, eta )
a = 2 * sol[1]
c = -sol[2]
b = sol[0] - a * c
print( "a = {}".format( a ) )
print( "b = {}".format( b ) )
print( "c = {}".format( c ) )
sol, cov = curve_fit( func, xData, yData, p0 = (a + my, b, c - mx) )
print( "linear fit vs non-linear fit:" )
print( a + my, b, c - mx )
print( sol )
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xData, yData, ls='', marker ='+', label="data")
ax.plot( xFit, func( xFit, a + my, b, c - mx), ls=':', label="linear guess")
ax.plot( xFit, func( xFit, *sol ), label="non-linear fit" )
ax.legend( loc=0 )
plt.show()
Providing
[ 8054 -6613 -31754]
and

Calculating the summation parameters separately

I am trying to use curve_fitting for a defined function of the form below:
Z = (Rth(1 - np.exp(- x/tau))
I want to calculate 1st four values of parameters Rth and tau. At the moment, it works fine If i use the whole function like this:
Z = (a * (1- np.exp (- x / b))) + (c * (1- np.exp (- x / d)))+ (e * (1- np.exp (- x / f))) + (g * (1- np.exp (- x / f)))
But this is certainly not the nice way to do it for example if i have a really long function with more than 4 exponential terms and I want to get all the parameters. How can I adjust it so that it returns specific number of values of Rth and tau after curve fitting?
For example, If I want to get 16 parameters from a 8 term exponential function, I don't have to write full 8 terms but just a general form and it gives the desired output.
Thank you.
Using least_squares it is quite simple to get an arbitrary sum of functions.
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import least_squares
def partition( inList, n ):
return zip( *[ iter( inList ) ] * n )
def f( x, a, b ):
return a * ( 1 - np.exp( -b * x ) )
def multi_f( x, params ):
if len( params) % 2:
raise TypeError
subparams = partition( params, 2 )
out = np.zeros( len(x) )
for p in subparams:
out += f( x, *p )
return out
def residuals( params, xdata, ydata ):
return multi_f( xdata, params ) - ydata
xl = np.linspace( 0, 8, 150 )
yl = multi_f( xl, ( .21, 5, 0.5, 0.1,2.7, .01 ) )
res = least_squares( residuals, x0=( 1,.9, 1, 1, 1, 1.1 ), args=( xl, yl ) )
print( res.x )
yth = multi_f( xl, res.x )
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xl, yl )
ax.plot( xl, yth )
plt.show( )
I managed to solve it by the following way, maybe not the smart way but it works for me.
def func(x,*args):
Z=0
for i in range(0,round(len(args)/2)):
Z += (args[i*2] * (1- np.exp (- x / args[2*i+1])))
return Z
Then calling the parameters in a separate function, I can adjust the number of parameters.
def func2(x,a,b,c,d,e,f,g,h):
return func(x,a,b,c,d,e,f,g,h)
popt , pcov = curve_fit(func2,x,y, method = 'trf', maxfev = 100000)
and it works fine for me.

Two parameter non-linear function for modeling a 3-D surface

I'm interested in modeling this surface with a simple equation that takes in two parameters (x,y) values and produces a z value. Ideally an equation that has a simple form. I have tried Monkey Saddle, polynomial regression (3rd and 4th order) and also multi-linear and log-linear OLS with some success (R^2 0.99), but none that are perfect especially for the curvy part. It seems like there should be a simple model to predict this surface. Maybe a non-linear regression method. Any suggestions? Thanks!
Using Mikuszefski's suggestion seems to produce a reasonable result for the curvy bit:
While the OP is problematic in the sense that it is not really a programming question, here my try to fit the data with a function that has a reasonable small amount of parameters, i.e. 6. The code somehow shows my line of thinking in retrieving this solution. It fits the data very well, but probably has no physical meaning whatsoever.
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
np.set_printoptions( precision=3 )
from scipy.optimize import curve_fit
from scipy.optimize import least_squares
data0 = np.loadtxt( "XYZ.csv", delimiter=",")
data = data0.reshape(11,16,4)
tdata = np.transpose(data, axes=(1,0,2))
### first guess for the shape of the data when plotted against x
def my_p( x, a, p):
return a * x**p
### turns out that the fit parameters theselve follow the above law
### themselve but with an offset and plotted against z, of course
def my_p2( x, a, p, x0):
return a * (x - x0)**p
def my_full( x, y, asc, aexp, ash, psc, pexp, psh):
a = my_p2( y, asc, aexp, ash )
p = my_p2( y, psc, pexp, psh )
z = my_p( x, a, p)
return z
### base lists for plotting
xl = np.linspace( 0, 30, 150 )
yl = np.linspace( 5, 200, 200 )
### setting the plots
fig = plt.figure( figsize=( 16, 12) )
ax = fig.add_subplot( 2, 3, 1 )
bx = fig.add_subplot( 2, 3, 2 )
cx = fig.add_subplot( 2, 3, 3 )
dx = fig.add_subplot( 2, 3, 4 )
ex = fig.add_subplot( 2, 3, 5 )
fx = fig.add_subplot( 2, 3, 6, projection='3d' )
### fitting the data linewise for different z as function of x
### keeping track of the fit parameters
adata = list()
pdata = list()
for subdata in data:
xdata = subdata[::,1]
zdata = subdata[::,3 ]
sol,_ = curve_fit( my_p, xdata, zdata )
print( sol, subdata[0,2] ) ### fit parameters for different z
adata.append( [subdata[0,2] , sol[0] ] )
pdata.append( [subdata[0,2] , sol[1] ] )
### plotting the z-cuts
ax.plot( xdata , zdata , ls='', marker='o')
ax.plot( xl, my_p( xl, *sol ) )
adata = np.array( adata )
pdata = np.array( pdata )
ax.scatter( [0],[0] )
ax.grid()
### fitting the the fitparameters as function of z
sola, _ = curve_fit( my_p2, adata[::,0], adata[::,1], p0= ( 1, -0.05,0 ) )
print( sola )
bx.plot( *(adata.transpose() ) )
bx.plot( yl, my_p2( yl, *sola))
solp, _ = curve_fit( my_p2, pdata[::,0], pdata[::,1], p0= ( 1, -0.05,0 ) )
print( solp )
cx.plot( *(pdata.transpose() ) )
cx.plot( yl, my_p2( yl, *solp))
### plotting the cuts applying the resuts from the "fit of fits"
for subdata in data:
xdata = subdata[ ::, 1 ]
y0 = subdata[ 0, 2 ]
zdata = subdata[ ::, 3 ]
dx.plot( xdata , zdata , ls='', marker='o' )
dx.plot(
xl,
my_full(
xl, y0, 2.12478827, -0.187, -20.84, 0.928, -0.0468, 0.678
)
)
### now fitting the entire data with the overall 6 parameter function
def residuals( params, alldata ):
asc, aexp, ash, psc, pexp, psh = params
diff = list()
for data in alldata:
x = data[1]
y = data[2]
z = data[3]
zth = my_full( x, y, asc, aexp, ash, psc, pexp, psh)
diff.append( z - zth )
return diff
## and fitting using the hand-made residual function and least_squares
resultfinal = least_squares(
residuals,
x0 =( 2.12478827, -0.187, -20.84, 0.928, -0.0468, 0.678 ),
args = ( data0, ) )
### least_squares does not provide errors but the approximated jacobian
### so we follow:
### https://stackoverflow.com/q/61459040/803359
### https://stackoverflow.com/q/14854339/803359
print( resultfinal.x)
resi = resultfinal.fun
JMX = resultfinal.jac
HMX = np.dot( JMX.transpose(),JMX )
cov_red = np.linalg.inv( HMX )
s_sq = np.sum( resi**2 ) /( len(data0) - 6 )
cov = cov_red * s_sq
print( cov )
### plotting the cuts with the overall fit
for subdata in data:
xdata = subdata[::,1]
y0 = subdata[0,2]
zdata = subdata[::,3 ]
ex.plot( xdata , zdata , ls='', marker='o')
ex.plot( xl, my_full( xl, y0, *resultfinal.x ) )
### and in 3d, which is actually not very helpful partially due to the
### fact that matplotlib has limited 3d capabilities.
XX, YY = np.meshgrid( xl, yl )
ZZ = my_full( XX, YY, *resultfinal.x )
fx.scatter(
data0[::,1], data0[::,2], data0[::,3],
color="#ff0000", alpha=1 )
fx.plot_wireframe( XX, YY, ZZ , cmap='inferno')
plt.show()
Providing
[1.154 0.866] 5.0
[1.126 0.837] 10.0
[1.076 0.802] 20.0
[1.013 0.794] 30.0
[0.975 0.789] 40.0
[0.961 0.771] 50.0
[0.919 0.754] 75.0
[0.86 0.748] 100.0
[0.845 0.738] 125.0
[0.816 0.735] 150.0
[0.774 0.726] 200.0
[ 2.125 -0.186 -20.841]
[ 0.928 -0.047 0.678]
[ 1.874 -0.162 -13.83 0.949 -0.052 -1.228]
[[ 6.851e-03 -7.413e-04 -1.737e-01 -6.914e-04 1.638e-04 5.367e-02]
[-7.413e-04 8.293e-05 1.729e-02 8.103e-05 -2.019e-05 -5.816e-03]
[-1.737e-01 1.729e-02 5.961e+00 1.140e-02 -2.272e-03 -1.423e+00]
[-6.914e-04 8.103e-05 1.140e-02 1.050e-04 -2.672e-05 -6.100e-03]
[ 1.638e-04 -2.019e-05 -2.272e-03 -2.672e-05 7.164e-06 1.455e-03]
[ 5.367e-02 -5.816e-03 -1.423e+00 -6.100e-03 1.455e-03 5.090e-01]]
and
The fit looks good and the covariance matrix seems also ok.
The final function, hence, is:
z = 1.874 / ( y + 13.83 )**0.162 * x**( 0.949 / ( y + 1.228 )**0.052 )

Add Power-law and exponential fit based on chi square error minimization to my PDF

Hello as the title suggests I have been trying to add an exponential and power law fit to my PDF.
As shown in this picture:
The code i am using produces the underlying graph:
The code is this one:
a11=[9.76032106e-02, 6.73754187e-02, 3.20683249e-02, 2.21788509e-02,
2.70850237e-02, 9.90377323e-03, 2.11573411e-02, 8.46232347e-03,
8.49027869e-03, 7.33997745e-03, 5.71819070e-03, 4.62720448e-03,
4.11562884e-03, 3.20064313e-03, 2.66192941e-03, 1.69116510e-03,
1.94355212e-03, 2.55224949e-03, 1.23822395e-03, 5.29618250e-04,
4.03769641e-04, 3.96865740e-04, 3.38530868e-04, 2.04124701e-04,
1.63913557e-04, 2.04486864e-04, 1.82216592e-04, 1.34708400e-04,
9.24289261e-05, 9.55074181e-05, 8.13695322e-05, 5.15610541e-05,
4.15425149e-05, 4.68101099e-05, 3.33696885e-05, 1.61893058e-05,
9.61743970e-06, 1.17314090e-05, 6.65239507e-06]
b11=[3.97213201e+00, 4.77600082e+00, 5.74255432e+00, 6.90471618e+00,
8.30207306e+00, 9.98222306e+00, 1.20023970e+01, 1.44314081e+01,
1.73519956e+01, 2.08636432e+01, 2.50859682e+01, 3.01627952e+01,
3.62670562e+01, 4.36066802e+01, 5.24316764e+01, 6.30426504e+01,
7.58010432e+01, 9.11414433e+01, 1.09586390e+02, 1.31764173e+02,
1.58430233e+02, 1.90492894e+02, 2.29044305e+02, 2.75397642e+02,
3.31131836e+02, 3.98145358e+02, 4.78720886e+02, 5.75603061e+02,
6.92091976e+02, 8.32155588e+02, 1.00056488e+03, 1.20305636e+03,
1.44652749e+03, 1.73927162e+03, 2.09126048e+03, 2.51448384e+03,
3.02335795e+03, 3.63521656e+03, 4.37090138e+03]
plt.plot(b11,a11, 'ro')
plt.yscale("log")
plt.xscale("log")
plt.show()
I would like to add to the underlying graph a power law fit at smaller time and an exponential fit for loner times based on chi square error minimization method.
The data for the x axis saved in csv form:
The data for the x axis:
As mentioned in my comments, I think you can couple the power law and the exponential via a constant term. Alternatively, the data look like it can be fitted by two power laws. Although the comments suggest that there is truly an exponential behavior. Anyhow, I show both approaches here. In both cases I try to avoid any type of piece-wise definition. This also ensures $C^infty$.
In the first approach we have a * x**( -b ) for small x and a1 * exp( -d * x ) for large x. The idea is to choose an c such that the power law is much bigger than c for the required small x but significantly smaller otherwise.
This allows for the function mentioned in my comment, namely ( a * x**( -b ) + c ) * exp( -d * x ) . One may consider c as an transition parameter.
In the alternative approaches, I am taking two power-laws. There are, hence, two regions, In the first one function one is smaller, in the second, the second is smaller. As I always want the smaller function I make inverse summation, i.e., f = 1 / ( 1 / f1 + 1 / f2 ). As can be seen in the code below, I add an additional parameter ( technically in ] 0, infty [ ). This parameter controls the smoothness of the transition.
import matplotlib.pyplot as mp
import numpy as np
from scipy.optimize import curve_fit
data = np.loadtxt( "7jyRi.txt", delimiter=',' )
#### p-e: power and exponential coupled via a small constant term
def func_log( x, a, b, c, d ):
return np.log10( ( a * x**( -b ) + c ) * np.exp( -d * x ) )
guess = [.1, .8, 0.01, .005 ]
testx = np.logspace( 0, 3, 150 )
testy = np.fromiter( ( 10**func_log( x, *guess ) for x in testx ), np.float )
sol, _ = curve_fit( func_log, data[ ::, 0 ], np.log10( data[::,1] ), p0=guess )
fity = np.fromiter( ( 10**func_log( x, *sol ) for x in testx ), np.float )
#### p-p: alternatively using two power laws
def double_power_log( x, a, b, c, d, k ):
s1 = ( a * x**( -b ) )**k
s2 = ( c * x**( -d ) )**k
out = 1.0 / ( 1.0 / s1 + 1.0 / s2 )**( 1.0 / k )
return np.log10( out )
aguess = [.1, .8, 1e7, 4, 1 ]
atesty = np.fromiter( ( 10**double_power_log( x, *aguess ) for x in testx ), np.float )
asol, _ = curve_fit( double_power_log, data[ ::, 0 ], np.log10( data[ ::, 1 ] ), p0=aguess )
afity = np.fromiter( ( 10**double_power_log( x, *asol ) for x in testx ), np.float )
#### plotting
fig = mp.figure( figsize=( 10, 8 ) )
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( data[::,0], data[::,1] ,ls='', marker='o', label="data" )
ax.plot( testx, testy ,ls=':', label="guess p-e" )
ax.plot( testx, atesty ,ls=':',label="guess p-p" )
ax.plot( testx, fity ,ls='-',label="fit p-e: {}".format( sol ) )
ax.plot( testx, afity ,ls='-', label="fit p-p: {}".format( asol ) )
ax.set_xscale( "log" )
ax.set_yscale( "log" )
ax.set_xlim( [ 5e-1, 2e3 ] )
ax.set_ylim( [ 1e-5, 2e-1 ] )
ax.legend( loc=0 )
mp.show()
The results look like
For completeness I'd like to add a solution with a piece-wise definition. As I want the function continuous and differentiable, the parameters of the exponential law are not completely free. With f = a * x**(-b) and g = alpha * exp( -beta * x ) and a transition at x0 I choose ( a, b, x0 ) as free parameters. From this alpha and beta follow. The equations have no easy solution though, such that this itself requires a minimization.
import matplotlib.pyplot as mp
import numpy as np
from scipy.optimize import curve_fit
from scipy.optimize import minimize
from scipy.special import lambertw
data = np.loadtxt( "7jyRi.txt", delimiter=',' )
def pwl( x, a, b):
return a * x**( -b )
def expl( x, a, b ):
return a * np.exp( -b * x )
def alpha_fun(alpha, a, b, x0):
out = alpha - pwl( x0, a, b ) * expl(1, 1, lambertw( pwl( x0, -a * b/ alpha, b ) ) )
return 1e10 * np.abs( out )**2
def p_w( v, a,b, alpha, beta, x0 ):
if v < x0:
out = pwl( v, a, b )
else:
out = expl( v, alpha, beta )
return np.log10( out )
def alpha_beta( x, a, b, x0 ):
"""
continuous and differentiable define alpha and beta
free parameter is the point where I connect
"""
sol = minimize(alpha_fun, .005, args=( a, b, x0 ) )### attention, strongly depends on starting guess, i.e might be a catastrophic fail
alpha = sol.x[0]
# ~print alpha
beta = np.real( -lambertw( pwl( x0, -a * b/ alpha, b ) )/ x0 )
###
if isinstance( x, ( np.ndarray, list, tuple ) ):
out = list()
for v in x:
out.append( p_w( v, a, b, alpha, beta, x0 ) )
else:
out = p_w( v, a, b, alpha, beta, x0 )
return out
sol,_ = curve_fit( alpha_beta, data[ ::, 0 ], np.log10( data[ ::, 1 ] ), p0=[ .1, .8, 70. ] )
alpha0 = minimize(alpha_fun, .005, args=tuple(sol ) ).x[0]
beta0 = np.real( -lambertw( pwl( sol[2], -sol[0] * sol[1]/ alpha0, sol[1] ) )/ sol[2] )
xl = np.logspace(0,3,100)
yl = alpha_beta( xl, *sol )
pl = pwl( xl, sol[0], sol[1] )
el = expl( xl, alpha0, beta0 )
#### plotting
fig = mp.figure( figsize=( 10, 8 ) )
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( data[::,0], data[::,1] ,ls='', marker='o', label="data" )
ax.plot( xl, pl ,ls=':', label="p" )
ax.plot( xl, el ,ls=':', label="{:0.3e} exp(-{:0.3e} x)".format(alpha0, beta0) )
ax.plot( xl, [10**y for y in yl] ,ls='-', label="sol: {}".format(sol) )
ax.axvline(sol[-1], color='k', ls=':')
ax.set_xscale( "log" )
ax.set_yscale( "log" )
ax.set_xlim( [ 5e-1, 2e3 ] )
ax.set_ylim( [ 1e-5, 2e-1 ] )
ax.legend( loc=0 )
mp.show()
Eventually providing

Categories

Resources