Plot a model with multiple curve_fit parameters - python

I have a model that describes a sum of Gaussians distributions:
s1 = np.random.normal(2, 0.5, size = (1000, 1))
s2 = np.random.normal(5, 0.5, size = (1000, 1))
mb = (np.concatenate((s1, s2), axis=0)).max()
Xi = np.arange(0,mb,0.1) #bins
#histogram population 1
Y11, bins1 = np.histogram(s1, X)
Y1 = Y11/Y11.sum()
X1 = bins1[:-1]
#histogram population 2
Y22, bins2 = np.histogram(s2, X)
Y2 = Y22/Y22.sum()
X2 = bins2[:-1]
#universe, with all mixed populations
S = np.concatenate((s1, s2), axis=0)
Yi, bins = np.histogram(S, Xi)
Y = Yi/Yi.sum()
X = bins[:-1]
def gaussians(X, amp1, mean1, SD1, amp2, mean2, SD2):
A = amp1 * np.exp(-0.5*((X - mean1)/SD1)**2)
B = amp2 * np.exp(-0.5*((X - mean2)/SD2)**2)
return A + B
params, pcov = curve_fit(gaussians, X,Y, p0=(1,2,1,1,5,1), maxfev=4000)
j = numpy.arange(0.1, mb, 0.1)
plt.figure(figsize=(10, 6)) #size of graph
plt.plot(X, Y, 'o', linewidth=2)
plt.plot(X, gaussians(X ,params[0], params[1],params[2], params[3], params[4], params[5]),'b', linewidth=2)
plt.xlim([-.01, mb])
plt.ylim([0, 0.1])
plt.show()
This code plot a nice graph as follows:
I wonder how to plot each gaussian overlapped in the same graph from the parameters of my model function. I mean, something like this (made by hand):

For those worried to get the answer, I figured out how to do it. It's only matters to become zero all the parameters that you don't want to graph:
plt.plot(X, gaussians(X ,params[0], params[1],params[2], params[3], params[4], params[5]),'b', linewidth=8, alpha=0.1)
plt.plot(X, gaussians(X ,0, params[1],params[2], params[3], params[4], params[5]),'r', linewidth=1 )
plt.plot(X, gaussians(X ,params[0], params[1],params[2], 0, params[4], params[5]),'g', linewidth=1)
plt.xlim([-.01, mb])
plt.ylim([0, 0.1])

Related

fit more than one lognormal data with python

I am not high proficiency at maths, what i'm trying to do is to fit several populations that are supposed to be lognormal distributed. My piece of code is the next:
from scipy.optimize import curve_fit
# Generation of 3 population:
import numpy as np
s1 = np.random.lognormal(2, 0.6, 1000) #mu and sigma
s2 = np.random.lognormal(1.6, 0.3, 1000) #mu and sigma
s3 = np.random.lognormal(1.8, 0.5, 1000) #mu and sigma
mb = np.max([s1,s2,s3])
X = np.arange(1,mb,1)
#histogram population 1
Y11, bins1 = np.histogram(s1, X)
Y1 = Y11/Y11.sum()
X1 = bins1[:-1]
#histogram population 2
Y22, bins2 = np.histogram(s2, X)
Y2 = Y22/Y22.sum()
X2 = bins2[:-1]
#histogram population 3
Y33, bins3 = np.histogram(s3, X)
Y3 = Y33/Y33.sum()
X3 = bins3[:-1]
#universe, with all mixed populations
S = np.concatenate((s1, s2, s3), axis=None)
Yi, bins = np.histogram(S, X)
Y = Yi/Yi.sum()
X = bins[:-1]
def logN(x, mu, sigma):
return (np.exp(-(np.log(x) - mu)**2 / (2 * sigma **2)) / (x * sigma * np.sqrt(2 * np.pi))) #lognormal function
params, pcov = curve_fit(logN, X,Y, method="lm")
print(params)
plt.plot(X1, Y1, 'o')
plt.plot(X2, Y2, 'o')
plt.plot(X3, Y3, 'o')
plt.plot(X, Y, 'r-o')
plt.plot(X, logN(X ,params[0], params[1]))
plt.show()
This code produces a graph where I can get the global parameters mu and sigma. However, I'm confusing how should I do to get back the parameters of each population from the mixed population data. Any idea how to handle this problem is welcome

normal distribution curve doesn't fit well over histogram in subplots using matplotlib

I am using "plt.subplots(2, 2, sharex=True, sharey=True)" to draw a 2*2 subplots. Each subplot has two Y axis and contains normal distribution curve over a histogram. Noting I particularly set "sharex=True, sharey=True" here in order to make all subplots share the same X axis and Y axis.
After running my code, everything is fine except the second, three, and fourth subplots where the normal distribution curve doesn't fit the histogram very well (please see the figure here)
I did googling but failed to get this issue solved. However, if I set "sharex=True, sharey=False" in my code, then the figure looks correct, but all subplots use their own Y axix which isn't what I want. Please see the figure here
Hope this issue can be fixed by experts in StackOverflow. Many thanks in advance!
Below is my code:
import matplotlib.pyplot as plt
from scipy.stats import norm
def align_yaxis(ax1, v1, ax2, v2):
#adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1
_, y1 = ax1.transData.transform((0, v1))
_, y2 = ax2.transData.transform((0, v2))
inv = ax2.transData.inverted()
_, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))
miny, maxy = ax2.get_ylim()
ax2.set_ylim(miny+dy, maxy+dy)
def drawSingle(myax, mydf , title, offset):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1,1,1000)
mu =np.mean(x)
sigma =np.std(xs)
n, bins, patche = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('frequency',color="black",fontsize=12, weight = "bold")
myax.set_xlabel('X', fontsize=12, weight = "bold",horizontalalignment='center')
ax_twin = myax.twinx()
y_normcurve = norm.pdf(bins, mu, sigma)
ax_twin.plot(bins, y_normcurve, 'r--')
align_yaxis(myax,0,ax_twin,0)
peakpoint = norm.pdf(mu,loc=mu,scale=sigma)
plt.vlines(mu, 0, peakpoint, 'y', '--', label='example')
ax_twin.set_ylabel("probablility dense",color="black",fontsize=12, weight = "bold")
def drawSubplots(mydf1,mydf2,mydf3,mydf4, pos1,pos2,pos3,pos4, title, filename):
plt.rcParams['figure.figsize'] = (18,15 )
my_x_ticks = np.arange(-0.8, 0.8,0.1)
rows, cols = 2, 2
fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
drawSingle(ax[0][0], mydf1, "Subplot1", pos1)
drawSingle(ax[0][1], mydf2, "Subplot2", pos2)
drawSingle(ax[1][0], mydf3, "Subplot3", pos3)
drawSingle(ax[1][1], mydf4, "Subplot4", pos4)
plt.text(-1, -1, title, horizontalalignment='center', fontsize=18)
plt.show()
drawSubplots(df1, df2,df3,df4,3.2,3.1,2.7,2.85,"test9", "test9")
Here is an attempt to:
have the left y-axes being "frequency" (which is very uninformative in the case of the current bin widths) and shared among the 4 subplots
have the right y-axes be a "probability density"; note how the top of all gaussians is around y=0.02 (the twin axes can only be set at the end because the shared y axes can be updated via later subplots)
have the histogram and the normal curve aligned
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import norm
def drawSingle(myax, mydf, title):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1, 1, 1000)
mu = np.mean(x)
sigma = np.std(xs)
n, bins, patches = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('frequency', color="black", fontsize=12, weight="bold")
myax.set_xlabel('X', fontsize=12, weight="bold", horizontalalignment='center')
normalization_factor = len(xs) * (bins[1] - bins[0])
y_normcurve = norm.pdf(x, mu, sigma) * normalization_factor
myax.plot(x, y_normcurve, 'r--')
myax.vlines(mu, 0, y_normcurve.max(), 'y', '--', color='lime', label='example')
return normalization_factor
def drawSubplots(mydf1, mydf2, mydf3, mydf4, title):
plt.rcParams['figure.figsize'] = (18, 15)
fig, ax = plt.subplots(nrows=2, ncols=2, sharex=True, sharey=True)
dfs = [mydf1, mydf2, mydf3, mydf4]
norm_factors = [drawSingle(ax_i, df, title)
for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"])]
for ax_i, norm_factor in zip(ax.ravel(), norm_factors):
ax_twin = ax_i.twinx()
ymax = ax_i.get_ylim()[1]
ax_twin.set_ylim(0, ymax / norm_factor)
plt.suptitle(title, fontsize=18)
plt.tight_layout()
plt.show()
df1, df2, df3, df4 = [pd.DataFrame({"gap": np.random.normal(0, 0.2, n)}) for n in [6000, 4000, 1800, 1200]]
drawSubplots(df1, df2, df3, df4, "Title")
Many thanks JohanC, you are amazing.
Based on your code, I just added a few lines of code within drawSubplots function in order to make 95% of the Gaussian curve area shaded between the lower bound and upper bound for each subplot. The following is my try. It seems that ax_twin.fill_between doesn't work normally here. As you could see from the figure that the shaded area is out of the Gaussian curve enter image description here. What I want is only to shade the area under the Gaussian curve between the lower bound and upper bound. If you don't mind, would you please check it out my mistake? Thank you very much!
import matplotlib.pyplot as plt
import math
from scipy.stats import norm
def align_yaxis(ax1, v1, ax2, v2):
#adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1
_, y1 = ax1.transData.transform((0, v1))
_, y2 = ax2.transData.transform((0, v2))
inv = ax2.transData.inverted()
_, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))
miny, maxy = ax2.get_ylim()
ax2.set_ylim(miny+dy, maxy+dy)
def drawSingle(myax, mydf , title):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1,1,1000)
mu =np.mean(xs)
sigma =np.std(xs)
n, bins, patches = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('Frequency', color="black", fontsize=12, weight="bold")
myax.set_xlabel(title, fontsize=12, weight="bold", horizontalalignment='center')
normalization_factor = len(xs) * (bins[1] - bins[0])
y_normcurve = norm.pdf(x, mu, sigma) * normalization_factor
myax.plot(x, y_normcurve, 'r--')
myax.vlines(mu, 0, y_normcurve.max(), 'y', '--', color='lime', label='example')
plt.xlim(-0.8,0.8)
my_x_ticks = np.arange(-0.8, 0.8,0.1)
plt.xticks(my_x_ticks)
return normalization_factor, mu, sigma
def drawSubplots(mydf1,mydf2,mydf3,mydf4, title):
plt.rcParams['figure.figsize'] = (18,15 )
norm_factors = []
mus = []
sigmas = []
my_x_ticks = np.arange(-0.8, 0.8,0.1)
rows, cols = 2, 2
fig, ax = plt.subplots(nrows=rows, ncols=cols, sharex=True, sharey=True)
dfs = [mydf1, mydf2, mydf3, mydf4]
#norm_factors = [drawSingle(ax_i, df, title)
#for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"])]
for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"]):
norm_factor, mu, sigma = drawSingle(ax_i, df, title)
norm_factors.append(norm_factor)
mus.append(mu)
sigmas.append(sigma)
for ax_i, norm_factor, mu, sigma in zip(ax.ravel(), norm_factors, mus, sigmas ):
ax_twin = ax_i.twinx()
xmax = ax_i.get_xlim()[1]
ax_twin.set_ylim(0, xmax / norm_factor)
ax_twin.set_ylabel("probablility dense",color="black",fontsize=12, weight = "bold")
CI_95_lower = mu - (1.96*sigma)
CI_95_upper = mu + (1.96*sigma)
px_shaded = np.arange(CI_95_lower,CI_95_upper,0.1)
ax_twin.fill_between(px_shaded,norm.pdf(px_shaded,loc=mu,scale=sigma) * norm_factor,alpha=0.75, color='pink')
area_shaded_95_CI = norm.cdf(x=CI_95_upper, loc=mu, scale=sigma)-norm.cdf(x=CI_95_lower, loc=mu, scale=sigma)
ax_twin.text(-0.06,0.01,str(round(area_shaded_95_CI*100,1))+"%", fontsize=20)
ax_twin.annotate(s=f'lower bound= {CI_95_lower:.3f}',xy=(CI_95_lower,norm.pdf(CI_95_lower,loc=mu,scale=sigma)),xytext=(-0.75,0.01),weight='bold',color='blue',\
arrowprops=dict(arrowstyle='-|>',connectionstyle='arc3',color='green'),\
fontsize=12
)
ax_twin.annotate(s=f'upper bound= {CI_95_upper:.3f}',xy=(CI_95_upper,norm.pdf(CI_95_upper,loc=mu,scale=sigma)),xytext=(0.28,0.01),weight='bold',color='blue',\
arrowprops=dict(arrowstyle='-|>',connectionstyle='arc3',color='green'),\
fontsize=12
)
ax_twin.text(0.05, 0.03, r"$\mu=" + f'{mu:.6f}' + ", \sigma=" + f'{sigma:.6f}' + "$" + ", confidence interval=95%" ,
horizontalalignment='center', fontsize=15)
plt.suptitle(title, fontsize=18)
plt.tight_layout()
plt.show()
df1, df2, df3, df4 = [pd.DataFrame({"gap": np.random.normal(0, 0.2, n)}) for n in [6000, 4000, 1800, 1200]]
drawSubplots(df1, df2, df3, df4, "Title")

Fit 3d coordinates into a parabola

I would like to predict a ball trajectory by fitting its 3d coordinates into a parabola. Below is my code. But instead of a parabola, I got a straight line. If you have any clue about it, please let me know. Thanks!
# draw scatter coordiante
fig = plt.figure()
ax = plt.axes(projection = '3d')
x_list = []
y_list = []
z_list = []
for x in rm_list:
x_list.append(x[0][0])
y_list.append(x[0][1])
z_list.append(x[0][2])
x = np.array(x_list)
y = np.array(y_list)
z = np.array(z_list)
ax.scatter(x, y, z)
# curve fit
def func(x, a, b, c, d):
return a * x[0]**2 + b * x[1]**2 + c * x[0] * x[1] + d
data = np.column_stack([x_list, y_list, z_list])
popt, _ = curve_fit(func, data[:,:2].T, ydata=data[:,2])
a, b, c, d = popt
print('y= %.5f * x ^ 2 + %.5f * y ^ 2 + %.5f * x * y + %.5f' %(a, b, c, d))
x1 = np.linspace(0.3, 0.4, 100)
y1 = np.linspace(0.02, 0.06, 100)
z1 = a * x1 ** 2 + b * y1 ** 2 + c * x1 * y1 + d
ax.plot(x1, y1, z1, color='green')
plt.show()
Update 1
After changing the func to ax^2 + by^2 + cxy + dx + ey + f, I got a parabola but not fitting to the coordinate.
That you have your underlying timestamp data makes the fitting procedure easier:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from numpy.polynomial import Polynomial
# test data generation with some noise
# here read in your data
np.random.seed(123)
n = 40
x_param = [ 1, 21, -1]
y_param = [12, -3, 0]
z_param = [-3, 0, -2]
px = Polynomial(x_param)
py = Polynomial(y_param)
pz = Polynomial(z_param)
t = np.random.choice(np.linspace (-3000, 2000, 1000)/500, n)
x = px(t) + np.random.random(n)
y = py(t) + np.random.random(n)
z = pz(t) + np.random.random(n)
# here start the real calculations
# draw scatter coordinates of raw data
fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.scatter(x, y, z, label="raw data")
# curve fit function
def func(t, x2, x1, x0, y2, y1, y0, z2, z1, z0):
Px=Polynomial([x2, x1, x0])
Py=Polynomial([y2, y1, y0])
Pz=Polynomial([z2, z1, z0])
return np.concatenate([Px(t), Py(t), Pz(t)])
# curve fit
# start values are not necessary for this example
# but make it your rule to always provide start values for curve_fit
start_vals = [ 1, 10, 1,
10, 1, 1,
-1, -1, -1]
xyz = np.concatenate([x, y, z])
popt, _ = curve_fit(func, t, xyz, p0=start_vals)
print(popt)
#[ 1.58003630e+00 2.10059868e+01 -1.00401965e+00
# 1.25895591e+01 -2.97374035e+00 -3.23358241e-03
# -2.44293562e+00 3.96407428e-02 -1.99671092e+00]
# regularly spaced fit data
t_fit = np.linspace(min(t), max(t), 100)
xyz_fit = func(t_fit, *popt).reshape(3, -1)
ax.plot(xyz_fit[0, :], xyz_fit[1, :], xyz_fit[2, :], color="green", label="fitted data")
ax.legend()
plt.show()
Sample output:

GP regression using Poisson likelihood

I am trying to implement GP regression using Poisson likelihood.
I followed the example in GPy by doing
poisson_likelihood = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.Laplace()
m = GPy.core.GP(X=X, Y=Y, likelihood=poisson_likelihood, inference_method=laplace_inf, kernel=kernel)
m.optimize()
#for ploting
pred_points = np.linspace(300,800,1000)[:, None]
#Predictive GP for log intensity mean and variance
f_mean, f_var = m._raw_predict(pred_points)
f_upper, f_lower = f_mean + 2*np.sqrt(f_var), f_mean - 2.*np.sqrt(f_var)
pb.figure(figsize=(10, 13))
pb.plot(pred_points, np.exp(f_mean), color='blue', lw=2)
pb.fill_between(pred_points[:,0], np.exp(f_lower[:,0]), np.exp(f_upper[:,0]), color='blue', alpha=.1)
pb.errorbar(Xc.flatten(), Yc.flatten(), dyc, fmt='.', color='k',markersize=8,alpha=1.0, label='Data')
When I tried to do the same using GPflow, I implemented in the following way
poisson_likelihood = gpflow.likelihoods.Poisson()
m = gpflow.models.VGP((X, Y), kernel=k, likelihood=poisson_likelihood, num_latent_gps=1)
opt = gpflow.optimizers.Scipy()
opt_logs = opt.minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=100))
#for ploting
xx = np.linspace(300, 800, 100).reshape(100, 1)
mean, var = m.predict_f(xx)
plt.plot(X, Y, "kx", mew=2)
plt.plot(xx, np.exp(mean), "C0", lw=2)
plt.fill_between(
xx[:, 0],
np.exp(mean[:, 0] - 1.96 * np.sqrt(var[:, 0])),
np.exp(mean[:, 0] + 1.96 * np.sqrt(var[:, 0])),
color="C0",
alpha=0.2,
)
When I implemented this using GP flow, the hyper parameters did not move from initialized values.
Also, I am getting very different results, am I doing something wrong?
Result with GPflow
Result with GPy

How to fit part of a Cosine curve to data in Python?

Written this code to try and plot a a graph of y = a(1 + cos(bx - pi)) + c to our data collected but when using np.cos it tries to fit an entire cycle of cos onto the data, which doesn't fit our results. Any help on how to fit only a section of the curve to our data would be fab!
Tried to avoid using cos by using maclaurin series expansion but this still doesn't work.
x_data = w
y_data = mean
e = error
from scipy import optimize
def test_func(x, a, b, c):
y = (a/2)*(1 + (1 - (1/2)*(b*x - np.pi)**2 + (1/24)*(b*x - np.pi)**4)) + c
return y
params, params_covariance = optimize.curve_fit(test_func, x_data, y_data)
print(params)
a = params[0]
b = params[1]
c = params[2]
figure(num=None, figsize=(12, 6), dpi=80, facecolor='w', edgecolor='k')
plt.errorbar(x_data, y_data, yerr=e, fmt='o', marker='o', label='Data', markersize=3, color='k', elinewidth=1, capsize=2, markeredgewidth=1)
plt.plot(x_data, test_func(x_data, params[0], params[1], params[2]), label='Fitted function')
plt.legend(loc='best')
plt.ylabel('Interference intensity, $I$')
plt.xlabel('Rotational velocity of interferometer, $w$')
plt.show()
Your question is "how to fit only a section of a curve to our data." This can be accomplished by defining a piece-wise function and fitting a section of your data to each corresponding piece of the function. You need to define the cut-off values that separate the parts of your data and pick which functions to fit to each part.
In order to fit a curve to only a section of the data, you need to only pass the portion of the data to curve_fit that you want to fit. Here are working examples of fitting the data to both a Maclaurin series and a cosine function:
from scipy import optimize
# Generate sample data
np.random.seed(0)
x_data = np.linspace(-np.pi,3*np.pi,101)
y_data = np.cos(x_data) + np.random.rand(len(x_data))/4
idx = (x_data < 0) | (x_data > 2*np.pi)
y_data[idx] = 1 + np.random.rand(sum(idx))/4
e = np.random.rand(len(x_data))/10
# Select part of data to fit
fit_part = ~idx
x_data_to_fit = x_data[fit_part]
y_data_to_fit = y_data[fit_part]
Cosine Function:
def test_func(x, a, b):
y = a*np.cos(b*x)
return y
params, params_covariance = optimize.curve_fit(test_func, x_data_to_fit, y_data_to_fit)
print(params)
a = params[0]
b = params[1]
plt.figure(num=None, figsize=(12, 6), dpi=80, facecolor='w', edgecolor='k')
plt.title('Cosine Function Fit')
plt.errorbar(x_data, y_data, yerr=e, fmt='o', marker='o', label='Data', markersize=3, color='k', elinewidth=1, capsize=2, markeredgewidth=1)
plt.plot(x_data_to_fit, test_func(x_data_to_fit, a, b), label='Fitted function')
plt.legend(loc='best')
plt.ylabel('Interference intensity, $I$')
plt.xlabel('Rotational velocity of interferometer, $w$')
plt.show()
Maclaurin Series:
def test_func(x, a, b, c):
y = (a/2)*(1 + (1 - (1/2)*(b*x - np.pi)**2 + (1/24)*(b*x - np.pi)**4)) + c
return y
params, params_covariance = optimize.curve_fit(test_func, x_data_to_fit, y_data_to_fit)
print(params)
a = params[0]
b = params[1]
c = params[2]
plt.figure(num=None, figsize=(12, 6), dpi=80, facecolor='w', edgecolor='k')
plt.title('MacLaurin Series Fit')
plt.errorbar(x_data, y_data, yerr=e, fmt='o', marker='o', label='Data', markersize=3, color='k', elinewidth=1, capsize=2, markeredgewidth=1)
plt.plot(x_data_to_fit, test_func(x_data_to_fit, a, b, c), label='Fitted function')
plt.legend(loc='best')
plt.ylabel('Interference intensity, $I$')
plt.xlabel('Rotational velocity of interferometer, $w$')
plt.show()
The cosine function matches the data better than the Maclaurin series in this case because the data was generated using a cosine function.

Categories

Resources