Is it possible to make a fit to Maxwell-Boltzmann like data in matplotlib or similar module in python?
scipy.stats has support for the maxwell distribution.
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
maxwell = stats.maxwell
data = maxwell.rvs(loc=0, scale=5, size=10000)
params = maxwell.fit(data, floc=0)
print(params)
# (0, 4.9808603062591041)
plt.hist(data, bins=20, normed=True)
x = np.linspace(0, 25, 100)
plt.plot(x, maxwell.pdf(x, *params), lw=3)
plt.show()
The first parameter is the location or shift away from zero.
The second parameter is the scaling parameter, denoted by a on the wikipedia page.
To generate random variates (random data) with this distribution, use its rvs method:
newdata = maxwell.rvs(*params, size=100)
Related
from sklearn.mixture import GaussianMixture
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from astropy.io import ascii
from scipy.stats import norm
data= pd.read_csv("P, Theta all stars.csv",usecols=[1])
names = data.columns
df = pd.DataFrame(data, columns=names)
df.head()
mu,std=norm.fit(df)
x=df['Theta']
num_bins = 20
n, bins, patches = plt.hist(x, num_bins, color ='green',alpha = 0.7)
p = norm.pdf(bins, mu, std)
plt.gcf().set_size_inches((10, 8))
plt.plot(bins, p, '--', color ='black')
plt.title("Mean: {:.3f} and Standard Deviation: {:.3f}".format(mu, std))
plt.xlabel('Theta(curve fit)')
#plt.savefig("{0}.png", dpi=300)
plt.show()
Gaussian Fit with density parameter
Gaussian Fit with no Density parameter
I intend to have a gaussian fit while keeping the original y axis without normalising it . But if I want that, the gaussian is getting flattened (because the data is not normalised). In short how can I make both the data and the gaussian to be unnormalised?
I have a set of data that follows a normal distribution in which I can fit the histogram and obtain the mean and sigma.
For the sake of example, I will approximate it by generating a random normal distribution as follows:
from scipy.stats import maxwell
import math
import random
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.optimize import curve_fit
from IPython import embed # put embed() where you want to stop
import matplotlib.ticker as ticker
data = random.gauss(307, 16)
N, bins, patches = plt.hist(data, bins=40, density=True, alpha=0.5, histtype='bar', ec='black')
mu, std = norm.fit(data)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2, label= r'$\mu$ = '+'{:0.1f}'.format(mu)+r' $\pm$ '+'{:0.1f}'.format(std))
What I would like to do next is to generate a Maxwell distribution from this "normal" distribution and be able to fit
I have read scipy.stats.maxwell webpage and several other related questions but was not able to generate such a distribution from "a gauss distribution" and fit it. Any help would much appreciate it.
Well, knowing that each Maxwell is distribution of the absolute value of the molecule velocity, where each component is normally distributed, you could make sampling like code below
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import maxwell
def maxw(size = None):
"""Generates size samples of maxwell"""
vx = np.random.normal(size=size)
vy = np.random.normal(size=size)
vz = np.random.normal(size=size)
return np.sqrt(vx*vx + vy*vy + vz*vz)
mdata = maxw(100000)
h, bins = np.histogram(mdata, bins = 101, range=(0.0, 10.0))
x = np.linspace(0.0, 10.0, 100)
rv = maxwell()
fig, ax = plt.subplots(1, 1)
ax.hist(mdata, bins = bins, density=True)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='Maxwell pdf')
plt.title("Maxwell")
plt.show()
And here is the picture with sampling and Maxwell PDF overlapped
In Python, I have estimated the parameters for the density of a model of my distribution and I would like to plot the density function above the histogram of the distribution. In R it is similar to using the option prop=TRUE.
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
# initialization of the list "data"
# estimation of the parameter, in my case, mean and variance of a normal distribution
plt.hist(data, bins="auto") # data is the list of data
# here I would like to draw the density above the histogram
plt.show()
I guess the trickiest part is to make it fit.
Edit: I have tried this according to the first answer:
mean = np.mean(logdata)
var = np.var(logdata)
std = np.sqrt(var) # standard deviation, used by numpy as a replacement of the variance
plt.hist(logdata, bins="auto", alpha=0.5, label="données empiriques")
x = np.linspace(min(logdata), max(logdata), 100)
plt.plot(x, mlab.normpdf(x, mean, std))
plt.xlabel("log(taille des fichiers)")
plt.ylabel("nombre de fichiers")
plt.legend(loc='upper right')
plt.grid(True)
plt.show()
But it doesn't fit the graph, here is how it looks:
** Edit 2 ** Works with the option normed=True in the histogram function.
If I understand you correctly you have the mean and standard deviation of some data. You have plotted a histogram of this and would like to plot the normal distribution line over the histogram. This line can be generated using matplotlib.mlab.normpdf(), the documentation can be found here.
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
mean = 100
sigma = 5
data = np.random.normal(mean,sigma,1000) # generate fake data
x = np.linspace(min(data), max(data), 100)
plt.hist(data, bins="auto",normed=True)
plt.plot(x, mlab.normpdf(x, mean, sigma))
plt.show()
Which gives the following figure:
Edit: The above only works with normed = True. If this is not an option, we can define our own function:
def gauss_function(x, a, x0, sigma):
return a * np.exp(-(x - x0) ** 2 / (2 * sigma ** 2))
mean = 100
sigma = 5
data = np.random.normal(mean,sigma,1000) # generate fake data
x = np.linspace(min(data), max(data), 1000)
test = gauss_function(x, max(data), mean, sigma)
plt.hist(data, bins="auto")
plt.plot(x, test)
plt.show()
All what you are looking for, already are in seaborn.
You just have to use distplot
import seaborn as sns
import numpy as np
data = np.random.normal(5, 2, size=1000)
sns.distplot(data)
Following is the code I wrote for Weibull Distribution which generates data which will fit a Weibull distribution and try to plot the same
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
data = stats.exponweib.rvs(a=1, c=2.09, scale=10.895, loc=0, size=2500)
plt.plot(data, stats.exponweib.pdf(data, *stats.exponweib.fit(data, 1, 1, scale=02, loc=0))
_ = plt.hist(data, bins = np.linspace(0, 16, 33), normed=True, alpha=0.5)
plt.show()
My Question :-
I want a single line interpolation over the bins, why I am getting messed up plot ?
The matplotlib plot function plots the curve connecting the points in the order in which the points are given. To get the curve that you expect, sort data before plotting it. E.g.:
data = stats.exponweib.rvs(a=1, c=2.09, scale=10.895, loc=0, size=2500)
data.sort()
plt.plot(data, stats.exponweib.pdf(data, *stats.exponweib.fit(data, 1, 1, scale=2, loc=0)))
I'm trying to do a little bit of distribution plotting and fitting in Python using SciPy for stats and matplotlib for the plotting. I'm having good luck with some things like creating a histogram:
seed(2)
alpha=5
loc=100
beta=22
data=ss.gamma.rvs(alpha,loc=loc,scale=beta,size=5000)
myHist = hist(data, 100, normed=True)
Brilliant!
I can even take the same gamma parameters and plot the line function of the probability distribution function (after some googling):
rv = ss.gamma(5,100,22)
x = np.linspace(0,600)
h = plt.plot(x, rv.pdf(x))
How would I go about plotting the histogram myHist with the PDF line h superimposed on top of the histogram? I'm hoping this is trivial, but I have been unable to figure it out.
just put both pieces together.
import scipy.stats as ss
import numpy as np
import matplotlib.pyplot as plt
alpha, loc, beta=5, 100, 22
data=ss.gamma.rvs(alpha,loc=loc,scale=beta,size=5000)
myHist = plt.hist(data, 100, normed=True)
rv = ss.gamma(alpha,loc,beta)
x = np.linspace(0,600)
h = plt.plot(x, rv.pdf(x), lw=2)
plt.show()
to make sure you get what you want in any specific plot instance, try to create a figure object first
import scipy.stats as ss
import numpy as np
import matplotlib.pyplot as plt
# setting up the axes
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
# now plot
alpha, loc, beta=5, 100, 22
data=ss.gamma.rvs(alpha,loc=loc,scale=beta,size=5000)
myHist = ax.hist(data, 100, normed=True)
rv = ss.gamma(alpha,loc,beta)
x = np.linspace(0,600)
h = ax.plot(x, rv.pdf(x), lw=2)
# show
plt.show()
One could be interested in plotting the distibution function of any histogram.
This can be done using seaborn kde function
import numpy as np # for random data
import pandas as pd # for convinience
import matplotlib.pyplot as plt # for graphics
import seaborn as sns # for nicer graphics
v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')
# plot a kernel density estimation over a stacked barchart
plt.figure()
plt.hist([v1, v2], histtype='barstacked', normed=True);
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3);
plt.show()
from a coursera course on data visualization with python
Expanding on Malik's answer, and trying to stick with vanilla NumPy, SciPy and Matplotlib. I've pulled in Seaborn, but it's only used to provide nicer defaults and small visual tweaks:
import numpy as np
import scipy.stats as sps
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='ticks')
# parameterise our distributions
d1 = sps.norm(0, 10)
d2 = sps.norm(60, 15)
# sample values from above distributions
y1 = d1.rvs(300)
y2 = d2.rvs(200)
# combine mixture
ys = np.concatenate([y1, y2])
# create new figure with size given explicitly
plt.figure(figsize=(10, 6))
# add histogram showing individual components
plt.hist([y1, y2], 31, histtype='barstacked', density=True, alpha=0.4, edgecolor='none')
# get X limits and fix them
mn, mx = plt.xlim()
plt.xlim(mn, mx)
# add our distributions to figure
x = np.linspace(mn, mx, 301)
plt.plot(x, d1.pdf(x) * (len(y1) / len(ys)), color='C0', ls='--', label='d1')
plt.plot(x, d2.pdf(x) * (len(y2) / len(ys)), color='C1', ls='--', label='d2')
# estimate Kernel Density and plot
kde = sps.gaussian_kde(ys)
plt.plot(x, kde.pdf(x), label='KDE')
# finish up
plt.legend()
plt.ylabel('Probability density')
sns.despine()
gives us the following plot:
I've tried to stick with a minimal feature set while producing relatively nice output, notably using SciPy to estimate the KDE is very easy.