I would like to do a Monte Carlo Probabilistic Model in Structural Analysis. In order to do so, I need to graph this model:
I worked out following code, but it still needs a lot of work:
import pandas as pd
from matplotlib import pyplot
import numpy as np
from scipy.optimize import curve_fit
from numpy import arange
%matplotlib inline
# define the true objective function
def objective(x, a, b, c, d, e, f):
return (a * x) + (b * x**2) + (c * x**3) + (d * x**4) + (e * x**5) + f
y = np.array([1,0.99,0.97,0.93,0.9,0.81,0.7,0.57,0.5,0.32,0.25])
x = np.array([0,0.2,0.4,0.6,0.67,.8,0.9,1.0,1.05,1.2,1.32])
popt, _ = curve_fit(objective, x, y)
a, b, c, d, e, f = popt
pyplot.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = arange(min(x), max(x), 0.1)
# calculate the output for the range
y_line = objective(x_line, a, b, c, d, e, f)
# create a line plot for the mapping function
pyplot.plot(x_line, y_line, '--', color='red')
pyplot.show()
Can you help me do the code properly to create a curve_fit?
How can I determine whether a random number will be inside the curve?
To get the curve to attach to the Y axis on the left, one way would be to set the X axis minimum to be the same as the smallest X axis value that you have (in this case, zero). matplotlib.pyplot.xlim
To close the right side of the plot, you can plot a vertical line based on the min/max of your data set. matplotlib.pyplot.vlines
While perhaps an overly simplistic view of your problem, one way would be to simply compare the value in question to the ranges of your dataset.
min(y) <= a[1] <= max(y)
The following code shows each example, but doesn't take the time to make it necessarily as Pythonic as it could be (written literally for illustration).
Code:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import pandas as pd
from matplotlib import pyplot
import numpy as np
from scipy.optimize import curve_fit
from numpy import arange
# %matplotlib inline
# define the true objective function
def objective(x, a, b, c, d, e, f):
return (a * x) + (b * x**2) + (c * x**3) + (d * x**4) + (e * x**5) + f
y = np.array([1,0.99,0.97,0.93,0.9,0.81,0.7,0.57,0.5,0.32,0.25])
x = np.array([0,0.2,0.4,0.6,0.67,.8,0.9,1.0,1.05,1.2,1.32])
popt, _ = curve_fit(objective, x, y)
a, b, c, d, e, f = popt
pyplot.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = arange(min(x), max(x), 0.1)
# calculate the output for the range
y_line = objective(x_line, a, b, c, d, e, f)
# create a line plot for the mapping function
pyplot.plot(x_line, y_line, '--', color='red')
# Set X axis limits
pyplot.xlim(min(x),)
# Set Y axis limits
pyplot.ylim(0,)
# Close the curve on the right
pyplot.vlines(max(x), min(y), 0, linestyles='--', color='red')
# Value within range?
a = (0.15, 0.63)
a1 = min(x) <= a[0] <= max(x)
a2 = min(y) <= a[1] <= max(y)
if a1 and a2:
print('True')
# Plot test point
pyplot.plot(a[0], a[1], marker='o', markersize=5, color="blue")
pyplot.show()
Output:
Shell output: True
Related
I was following a tutorial for data fitting, and when I just changed original data to my data the fit became not quadratic.
Here's my code, thanks a lot for help:
# fit a second degree polynomial to the economic data
import numpy as np
from numpy import arange
from pandas import read_csv
from scipy.optimize import curve_fit
from matplotlib import pyplot
x = np.array([1,2,3,4,5,6])
y = np.array([1,4,12,29,54,104])
# define the true objective function
def objective(x, a, b, c):
return a * x + b * x**2 + c
# load the dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/longley.csv'
dataframe = read_csv(url, header=None)
data = dataframe.values
# choose the input and output variables
# curve fit
popt, _ = curve_fit(objective, x, y)
# summarize the parameter values
a, b, c = popt
print('y = %.5f * x + %.5f * x^2 + %.5f' % (a, b, c))
# plot input vs output
pyplot.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = arange(min(x), max(x), 1)
# calculate the output for the range
y_line = objective(x_line, a, b, c)
# create a line plot for the mapping function
pyplot.plot(x_line, y_line, '--', color='red')
pyplot.show()```
I tried python matplotlib quadratic data fit, and I expected quadratic function but visually it's not.
I am trying to fit a curve to some data but the resulting curve looks like a scrambled mess. I don't know whether or not the coefficients are accurate. With this sample data set it prints something like a triangle and with my original data set it looks even worse. It's mostly tutorial. I tried removing the sympy code from an alternate tutorial, but doing so accomplished nothing.
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym
x = [0.0009425070688029959,
0.0009398496240601303,
0.0018779342723004293,
0.004694835680751241,
0.0009425070688029959,
0.004734848484848552,
0.0018993352326685255,
0.0009460737937558928]
y = [0.0028301886792453904,
0.003762935089369628,
0.001881467544684814,
0.0009433962264150743,
0.0028301886792453904,
0.0019029495718363059,
0.0038058991436727804,
0.0018939393939393534]
"""
Plot your data
"""
plt.plot(x, y, 'ro',label="Original Data")
"""
brutal force to avoid errors
"""
x = np.array(x, dtype=float) #transform your data in a numpy array of floats
y = np.array(y, dtype=float) #so the curve_fit can work
"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you.
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def func(x, b, c, d):
return b * x * x + c * x + d
"""
make the curve_fit
"""
popt, pcov = curve_fit(func, x, y)
"""
The result is:
popt[0] = a , popt[1] = b, popt[2] = c and popt[3] = d of the function,
so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
"""
print("b = " + str(popt[0]) + " c = " + str(popt[1]) + " d = " + str(popt[2]))
"""t
Use sympy to generate the latex sintax of the function
"""
xs = sym.Symbol('\lambda')
tex = sym.latex(func(xs,*popt)).replace('$', '')
plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)
"""
Print the coefficients and plot the funcion.
"""
plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
#plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve")
plt.legend(loc='upper left')
plt.show()
This is because Matplotlib will only draw lines between the few points in your original data (in the x and y arrays) and in the order they are defined. There are only 3 unique x values (plus some noise) which is why you see what looks like a triangle.
The fix is to create a new array with evenly spread, and ordered, x values across the range you're interested in. You can do that with the linspace function in numpy.
For example, try this for your second plot command:
x_eval = np.linspace(min(x), max(x), 100)
plt.plot(x_eval, func(x_eval, *popt), label="Fitted Curve")
x_eval above is a list of 100 evenly spread values between the minimum and maximum x value in your original data.
Looks like you need to sort on xdata.
Try inserting this:
x,y = zip(*sorted(zip(x, y)))
Such that
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym
x = [0.0009425070688029959,
0.0009398496240601303,
0.0018779342723004293,
0.004694835680751241,
0.0009425070688029959,
0.004734848484848552,
0.0018993352326685255,
0.0009460737937558928]
y = [0.0028301886792453904,
0.003762935089369628,
0.001881467544684814,
0.0009433962264150743,
0.0028301886792453904,
0.0019029495718363059,
0.0038058991436727804,
0.0018939393939393534]
"""
Plot your data
"""
plt.plot(x, y, 'ro',label="Original Data")
"""
brutal force to avoid errors
"""
x,y = zip(*sorted(zip(x, y)))
x = np.array(x, dtype=float) #transform your data in a numpy array of floats
y = np.array(y, dtype=float) #so the curve_fit can work
"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you.
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def func(x, b, c, d):
return b * x * x + c * x + d
"""
make the curve_fit
"""
popt, pcov = curve_fit(func, x, y)
"""
The result is:
popt[0] = a , popt[1] = b, popt[2] = c and popt[3] = d of the function,
so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
"""
print("b = " + str(popt[0]) + " c = " + str(popt[1]) + " d = " + str(popt[2]))
"""t
Use sympy to generate the latex sintax of the function
"""
xs = sym.Symbol('\lambda')
tex = sym.latex(func(xs,*popt)).replace('$', '')
plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)
"""
Print the coefficients and plot the funcion.
"""
plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
#plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve")
plt.legend(loc='upper left')
plt.show()
The plotted curve from the data above.
I am fitting a set of experimental data (sample) within two different experimental regions and can be expressed with two mathematical functions as follows:
1st region:
y = m*x + c ( the slope can be constrained to zero)
2nd region:
y = d*exp(-k*x)
the experimental data is shown below and I coded it in python as follows:
def func(x, m, c, d, k):
return m*x+ c + d*np.exp(-k*x)
popt, pcov = curve_fit(func, t, y)
Unfortunately, my data is not fitting properly and fitted (returned) parameters do not make sense (see picture below).
Any assistance will be appreciated.
Very interesting question. As said by a_guest, you will have to fit to the two regions separately. However, I think you probably also want the two regions to connect smoothly at the point t0, the point where we switch from one model to the other. In order to do this, we need to add the constraint that y1 == y2 at the point t0.
In order to do this with scipy, look at scipy.optimize.minimize with the SLSQP method. However, I wrote a scipy wrapper to make this kind of thing easier, called symfit. I will show you how to do this with symfit, because I think it's better suited to the task, but with this example you should also be able to implement it with pure scipy if you prefer.
from symfit import parameters, variables, Fit, Piecewise, exp, Eq
import numpy as np
import matplotlib.pyplot as plt
t, y = variables('t, y')
m, c, d, k, t0 = parameters('m, c, d, k, t0')
# Help the fit by bounding the switchpoint between the models
t0.min = 0.6
t0.max = 0.9
# Make a piecewise model
y1 = m * t + c
y2 = d * exp(- k * t)
model = {y: Piecewise((y1, t <= t0), (y2, t > t0))}
# As a constraint, we demand equality between the two models at the point t0
# to do this, we substitute t -> t0 and demand equality using `Eq`
constraints = [Eq(y1.subs({t: t0}), y2.subs({t: t0}))]
# Read the data
tdata, ydata = np.genfromtxt('Experimental Data.csv', delimiter=',', skip_header=1).T
fit = Fit(model, t=tdata, y=ydata, constraints=constraints)
fit_result = fit.execute()
print(fit_result)
plt.scatter(tdata, ydata)
plt.plot(tdata, fit.model(t=tdata, **fit_result.params).y)
plt.show()
Since your data shows different behavior in different regions you also need to fit the data on these different regions. That is instead of making a sum of the two models (functions) you should fit one the left region with y = m*x + c and separately on the right region with y = d*exp(-k*x). If you have trouble finding the boundary of the two regions you could assess this by comparing the goodness of fit.
popt_1, pcov_1 = curve_fit(lambda x, m, c: m*x + c, t[t < 0.8], y[t < 0.8], p0=(1, 0))
popt_2, pcov_2 = curve_fit(lambda x, d, k: d*exp(-k*x), t[t >= 0.8], y[t >= 0.8], p0=(400, 1))
Edit
Example code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
df = pd.read_csv('test.csv', index_col=None)
t = df.t.values
y = df.Y.values
boundary = t[y.argmax()]
t1 = t[t < boundary]
y1 = y[t < boundary]
t2 = t[t >= boundary]
y2 = y[t >= boundary]
f1 = lambda x, m, c: m*x + c
f2 = lambda x, d, k: d*np.exp(-k*x)
popt_1 ,pcov_1 = curve_fit(f1, t1, y1, p0=((y1[-1] - y1[0]) / (t1[-1] - t1[0]), y1[0]))
popt_2 ,pcov_2 = curve_fit(f2, t2, y2, p0=(y2[0], 1))
plt.title('Fitted data on two different domains')
plt.xlabel('t [a.u.]')
plt.ylabel('y [a.u.]')
plt.plot(t, y, '-o', label='Data')
plt.plot(t1, f1(t1, *popt_1), '--', color='#ff7f0e', lw=3, label='Fit')
plt.plot(t2, f2(t2, *popt_2), '--', color='#ff7f0e', lw=3, label='_nolegend_')
plt.grid()
plt.legend()
plt.show()
Which produces the following plot:
Note that the resulting "compound" function is not continuous at the boundary. If that is undesired you can resolve it by fixing one the fit parameters (e.g.k) before fitting the other domain (one way or the other). Alternatively you could fit both regions separately, then determine the value at the boundary as the average of the two separate functions (i.e. y_b = (f1(t1[-1], *popt_1) + f2(t2[0], *popt_2)) / 2) and then repeat the fitting by constraining the parameters such that this boundary condition is fulfilled.
For example fitting the linear function first and then fixing the d parameter of the exponential in order to have a continuous transition at the boundary (note that the linear function f1 is extrapolated outside its domain at t2[0] in order to ensure the continuity):
f1 = lambda x, m, c: m*x + c
popt_1, pcov_1 = curve_fit(f1, t1, y1, p0=((y1[-1] - y1[0]) / (t1[-1] - t1[0]), y1[0]))
d = f1(t2[0], *popt_1)
f2 = lambda x, k: d*np.exp(-k*(x - boundary))
popt_2, pcov_2 = curve_fit(f2, t2, y2, p0=(1,))
Which produces the following plot:
If you would prefer to use a single equation, I found that the Hocket-Sherby equation "y = b - (b-a) * exp(-c * (x**d))" seems like an OK fit to your data, yielding an R-squared of 0.99 and RMSE of 11.2 with parameters a = 1.1262189756312683E+01, b = 3.2040596733114870E+02, c = 3.9385197507261771E-01, and d = -4.7723382040098095E+00
I want to fit complex data set with a two functions which shared the same parameters. For this I used
def funcReal(x,a,b,c,d):
return np.real((a + 1j*b)*(np.exp(1j*k*x - kappa1*x) - np.exp(kappa2*x)) + (c + 1j*d)*(np.exp(-1j*k*x - kappa1*x) - np.exp(-kappa2*x)))
def funcImag(x,a,b,c,d):
return np.imag((a + 1j*b)*(np.exp(1j*k*x - kappa1*x) - np.exp(kappa2*x)) + (c + 1j*d)*(np.exp(-1j*k*x - kappa1*x) - np.exp(-kappa2*x)))`
poptReal, pcovReal = curve_fit(funcReal, x, yReal)
poptImag, pcovImag = curve_fit(funcImag, x, yImag)
Here funcReal is the real part of my model, funcImag the imaginary part, yReal the real part of the data and yImag the imaginary part of the data.
However, both fits does not give me the same parameters for the real and imaginary part.
My question is there a package or a method such that I can realized multi fits for multiple data sets and multiple functions with shared parameters?
To fit both the complex function given above, we can treat the real and imaginary components as a coordinate point, or as a vector. Since curve_fit doesn't care about the order at which data points are inserted in the vectors x (independent data) and y (dependent data), we can simply split the complex data and stack the real and imaginary components using hstack. See the example below.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
kappa1 = np.pi
kappa2 = -0.01
def long_function(x, a, b, c, d):
return (a + 1j*b)*(np.exp(1j*k*x - kappa1*x) - np.exp(kappa2*x)) + (c + 1j*d)*(np.exp(-1j*k*x - kappa1*x) - np.exp(-kappa2*x))
def funcBoth(x, a, b, c, d):
N = len(x)
x_real = x[:N//2]
x_imag = x[N//2:]
y_real = np.real(long_function(x_real, a, b, c, d))
y_imag = np.imag(long_function(x_imag, a, b, c, d))
return np.hstack([y_real, y_imag])
# Create an independent variable with 100 measurements
N = 100
x = np.linspace(0, 10, N)
# True values of the dependent variable
y = long_function(x, a=1.1, b=0.3, c=-0.2, d=0.23)
# Add uniform complex noise (real + imaginary)
noise = (np.random.rand(N) + 1j * np.random.rand(N) - 0.5 - 0.5j) * 0.1
yNoisy = y + noise
# Split the measurements into a real and imaginary part
yReal = np.real(yNoisy)
yImag = np.imag(yNoisy)
yBoth = np.hstack([yReal, yImag])
# Find the best-fit solution
poptBoth, pcovBoth = curve_fit(funcBoth, np.hstack([x, x]), yBoth)
# Compute the best-fit solution
yFit = long_function(x, *poptBoth)
print(poptBoth)
# Plot the results
plt.figure(figsize=(9, 4))
plt.subplot(121)
plt.plot(x, np.real(yNoisy), "k.", label="Noisy y")
plt.plot(x, np.real(y), "r--", label="True y")
plt.plot(x, np.real(yFit), label="Best fit")
plt.ylabel("Real part of y")
plt.xlabel("x")
plt.legend()
plt.subplot(122)
plt.plot(x, np.imag(yNoisy), "k.")
plt.plot(x, np.imag(y), "r--")
plt.plot(x, np.imag(yFit))
plt.ylabel("Imaginary part of y")
plt.xlabel("x")
plt.tight_layout()
plt.show()
Result:
The best-fit parameters that were found in this example were a = 1.14, b = 0.375, c = -0.236, and d = 0.163, which are close enough to the true parameter values given the amplitude of the noise that I inserted here.
I have a system of two first order ODEs, which are nonlinear, and hence difficult to solve analytically in a closed form. I want to fit the numerical solution to this system of ODEs to a data set. My data set is for only one of the two variables that are part of the ODE system. How do I go about this?
This didn't help because there's only one variable there.
My code which is currently leading to an error is:
import numpy as np
from scipy.integrate import odeint
from scipy.optimize import curve_fit
def f(y, t, a, b, g):
S, I = y # S, I are supposed to be my variables
Sdot = -a * S * I
Idot = (a - b) * S * I + (b - g - b * I) * I
dydt = [Sdot, Idot]
return dydt
def y(t, a, b, g, y0):
y = odeint(f, y0, t, args=(a, b, g))
return y.ravel()
I_data =[] # I have data only for I, not for S
file = open('./ratings_showdown.csv')
for e_raw in file.read().split('\r\n'):
try:
e=float(e_raw); I_data.append(e)
except ValueError:
continue
data_t = range(len(I_data))
popt, cov = curve_fit(y, data_t, I_data, [.05, 0.02, 0.01, [0.99,0.01]])
#want to fit I part of solution to data for variable I
#ERROR here, ValueError: setting an array element with a sequence
a_opt, b_opt, g_opt, y0_opt = popt
print("a = %g" % a_opt)
print("b = %g" % b_opt)
print("g = %g" % g_opt)
print("y0 = %g" % y0_opt)
import matplotlib.pyplot as plt
t = np.linspace(0, len(data_y), 2000)
plt.plot(data_t, data_y, '.',
t, y(t, a_opt, b_opt, g_opt, y0_opt), '-')
plt.gcf().set_size_inches(6, 4)
#plt.savefig('out.png', dpi=96) #to save the fit result
plt.show()
This type of ODE fitting becomes a lot easier in symfit, which I wrote specifically as a user friendly wrapper to scipy. I think it will be very useful for your situation because the decreased amount of boiler-plate code simplifies things a lot.
From the docs and applied roughly to your problem:
from symfit import variables, parameters, Fit, D, ODEModel
S, I, t = variables('S, I, t')
a, b, g = parameters('a, b, g')
model_dict = {
D(S, t): -a * S * I,
D(I, t): (a - b) * S * I + (b - g - b * I) * I
}
ode_model = ODEModel(model_dict, initial={t: 0.0, S: 0.99, I: 0.01})
fit = Fit(ode_model, t=tdata, I=I_data, S=None)
fit_result = fit.execute()
Check out the docs for more :)
So I figured out the problem.
The curve_fit() function apparently returns a list as it's second return value. So, instead of passing the initial conditions as a list [0.99,0.01], I passed them separately as 0.99 and 0.01.