How to plot normal distribution - python

Given a mean and a variance is there a simple function call which will plot a normal distribution?

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.show()

I don't think there is a function that does all that in a single call. However you can find the Gaussian probability density function in scipy.stats.
So the simplest way I could come up with is:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
# Plot between -10 and 10 with .001 steps.
x_axis = np.arange(-10, 10, 0.001)
# Mean = 0, SD = 2.
plt.plot(x_axis, norm.pdf(x_axis,0,2))
plt.show()
Sources:
http://www.johndcook.com/distributions_scipy.html
http://docs.scipy.org/doc/scipy/reference/stats.html
http://telliott99.blogspot.com/2010/02/plotting-normal-distribution-with.html

Use seaborn instead
i am using distplot of seaborn with mean=5 std=3 of 1000 values
value = np.random.normal(loc=5,scale=3,size=1000)
sns.distplot(value)
You will get a normal distribution curve

If you prefer to use a step by step approach you could consider a solution like follows
import numpy as np
import matplotlib.pyplot as plt
mean = 0; std = 1; variance = np.square(std)
x = np.arange(-5,5,.01)
f = np.exp(-np.square(x-mean)/2*variance)/(np.sqrt(2*np.pi*variance))
plt.plot(x,f)
plt.ylabel('gaussian distribution')
plt.show()

Unutbu answer is correct.
But because our mean can be more or less than zero I would still like to change this :
x = np.linspace(-3 * sigma, 3 * sigma, 100)
to this :
x = np.linspace(-3 * sigma + mean, 3 * sigma + mean, 100)

I believe that is important to set the height, so created this function:
def my_gauss(x, sigma=1, h=1, mid=0):
from math import exp, pow
variance = pow(sigma, 2)
return h * exp(-pow(x-mid, 2)/(2*variance))
Where sigma is the standard deviation, h is the height and mid is the mean.
To:
plt.close("all")
x = np.linspace(-20, 20, 101)
yg = [my_gauss(xi) for xi in x]
Here is the result using different heights and deviations:

I have just come back to this and I had to install scipy as matplotlib.mlab gave me the error message MatplotlibDeprecationWarning: scipy.stats.norm.pdf when trying example above. So the sample is now:
%matplotlib inline
import math
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, scipy.stats.norm.pdf(x, mu, sigma))
plt.show()

you can get cdf easily. so pdf via cdf
import numpy as np
import matplotlib.pyplot as plt
import scipy.interpolate
import scipy.stats
def setGridLine(ax):
#http://jonathansoma.com/lede/data-studio/matplotlib/adding-grid-lines-to-a-matplotlib-chart/
ax.set_axisbelow(True)
ax.minorticks_on()
ax.grid(which='major', linestyle='-', linewidth=0.5, color='grey')
ax.grid(which='minor', linestyle=':', linewidth=0.5, color='#a6a6a6')
ax.tick_params(which='both', # Options for both major and minor ticks
top=False, # turn off top ticks
left=False, # turn off left ticks
right=False, # turn off right ticks
bottom=False) # turn off bottom ticks
data1 = np.random.normal(0,1,1000000)
x=np.sort(data1)
y=np.arange(x.shape[0])/(x.shape[0]+1)
f2 = scipy.interpolate.interp1d(x, y,kind='linear')
x2 = np.linspace(x[0],x[-1],1001)
y2 = f2(x2)
y2b = np.diff(y2)/np.diff(x2)
x2b=(x2[1:]+x2[:-1])/2.
f3 = scipy.interpolate.interp1d(x, y,kind='cubic')
x3 = np.linspace(x[0],x[-1],1001)
y3 = f3(x3)
y3b = np.diff(y3)/np.diff(x3)
x3b=(x3[1:]+x3[:-1])/2.
bins=np.arange(-4,4,0.1)
bins_centers=0.5*(bins[1:]+bins[:-1])
cdf = scipy.stats.norm.cdf(bins_centers)
pdf = scipy.stats.norm.pdf(bins_centers)
plt.rcParams["font.size"] = 18
fig, ax = plt.subplots(3,1,figsize=(10,16))
ax[0].set_title("cdf")
ax[0].plot(x,y,label="data")
ax[0].plot(x2,y2,label="linear")
ax[0].plot(x3,y3,label="cubic")
ax[0].plot(bins_centers,cdf,label="ans")
ax[1].set_title("pdf:linear")
ax[1].plot(x2b,y2b,label="linear")
ax[1].plot(bins_centers,pdf,label="ans")
ax[2].set_title("pdf:cubic")
ax[2].plot(x3b,y3b,label="cubic")
ax[2].plot(bins_centers,pdf,label="ans")
for idx in range(3):
ax[idx].legend()
setGridLine(ax[idx])
plt.show()
plt.clf()
plt.close()

import math
import matplotlib.pyplot as plt
import numpy
import pandas as pd
def normal_pdf(x, mu=0, sigma=1):
sqrt_two_pi = math.sqrt(math.pi * 2)
return math.exp(-(x - mu) ** 2 / 2 / sigma ** 2) / (sqrt_two_pi * sigma)
df = pd.DataFrame({'x1': numpy.arange(-10, 10, 0.1), 'y1': map(normal_pdf, numpy.arange(-10, 10, 0.1))})
plt.plot('x1', 'y1', data=df, marker='o', markerfacecolor='blue', markersize=5, color='skyblue', linewidth=1)
plt.show()

For me, this worked pretty well if you are trying to plot a particular pdf
theta1 = {
"a": 0.5,
"cov" : 1,
"mean" : 0
}
x = np.linspace(start = 0, stop = 1000, num = 1000)
pdf = stats.norm.pdf(x, theta1['mean'], theta1['cov']) + theta2['a']
sns.lineplot(x,pdf)

Related

Why a norm distribution does not plot a line on stats.probplot()?

The problem is with the resultant graph of function scipy.stats.probplot().
Samples from a normal distribution doesn't produce a line as expected.
I am trying to normalize some data using graphs as guidance.
However, after some strange results showing that zscore and log transformations were having no effect, I started looking for something wrong.
So, I built a graph using synthetic values that has a norm distribution and the resultant graph seems very awkward.
Here is the steps to reproduce the array and the graph:
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
norm = stats.norm.pdf(x, mu, sigma)
plt.plot(x, norm)
plt.show()
_ = stats.probplot(norm, plot=plt, sparams=(0, 1))
plt.show()
Distribution curve:
Probability plot:
Your synthesized data aren't normally distributed, they are uniformly distributed, this is what numpy.linspace() does. You can visualize this by adding seaborn.distplot(x, fit=scipy.stats.norm).
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
y = stats.norm.pdf(x, mu, sigma)
sns.distplot(y, fit=stats.norm)
fig = plt.figure()
res = stats.probplot(y, plot=plt, sparams=(0, 1))
plt.show()
Try synthesizing your data with numpy.random.normal(). This will give you normally distributed data.
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.random.normal(loc=mu, scale=sigma, size=100)
sns.distplot(x, fit=stats.norm)
fig = plt.figure()
res = stats.probplot(x, plot=plt, sparams=(0, 1))
plt.show()

Difference between scipy.stats.norm.pdf and plotting gaussian manually

I'm plotting a simple normal distribution using scipy.stats, but for some reason when I try to compare it to the regular gaussian formula the plot looks very different:
import numpy as np
import scipy.stats as stats
x = np.linspace(-50,175,10000)
sig1, mu1 = 10.0, 30.0
y1 = stats.norm.pdf(x, mu1, sig1)
y11 = np.exp(-(x-mu1)**2/2*sig1)/(np.sqrt(2*np.pi*sig1))
plt.plot(x,y11)
plt.plot(x,y1)
The result is:
Can someone explain to me why they are not the same?
stats.norm.pdf requires sigma, but in your calculation you are using it as variance. Also there are two brackets missing.
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
x = np.linspace(-50, 175, 10000)
sig1, mu1 = 10.0, 30.0
var1 = sig1 ** 2
y1 = stats.norm.pdf(x, mu1, sig1)
y11 = np.exp(-((x - mu1) ** 2) / (2 * var1)) / (np.sqrt(2 * np.pi * var1))
plt.plot(x, y11)
plt.plot(x, y1)
plt.show()
Which produces the same plot.
Cheers!
First of all, you have a mistake. Parentheses are missing for the denominator in the exponential of the manual gaussian.
Furthermore, the third argument of scipy.norm.pdf is the standard deviation (sigma), but that appears in the gaussian PDF squared (sigma^2).
The corrected code would be
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
x = np.linspace(-50,175,10000)
sig1, mu1 = 10.0, 30.0
y1 = stats.norm.pdf(x, mu1, sig1)
y11 = np.exp(-(x-mu1)**2/(2*sig1**2))/(np.sqrt(2*np.pi*sig1**2))
plt.plot(x,y1,label="scipy",alpha=0.85)
plt.plot(x,y11,ls="--",label="custom")
plt.legend()
plt.grid()
Which outputs:

Draw the density curve exactly on the Histogram without normalizing

I need to draw the density curve on the Histogram with the actual height of the bars (actual frequency) as the y-axis.
Try1:
I found a related answer here but, it has normalized the Histogram to the range of the curve.
Below is my code and the output.
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from scipy.stats import norm
data = [125.36, 126.66, 130.28, 133.74, 126.92, 120.85, 119.42, 128.61, 123.53, 130.15, 126.02, 116.65, 125.24, 126.84,
125.95, 114.41, 138.62, 127.4, 127.59, 123.57, 133.76, 124.6, 113.48, 128.6, 121.04, 119.42, 120.83, 136.53, 120.4,
136.58, 121.73, 132.72, 109.25, 125.42, 117.67, 124.01, 118.74, 128.99, 131.11, 112.27, 118.76, 119.15, 122.42,
122.22, 134.71, 126.22, 130.33, 120.52, 126.88, 117.4]
(mu, sigma) = norm.fit(data)
x = np.linspace(min(data), max(data), 100)
plt.hist(data, bins=12, normed=True)
plt.plot(x, mlab.normpdf(x, mu, sigma))
plt.show()
Try2:
There #DavidG has given an option, a user defined function even it doesn't cover the density of the Histogram accurately.
def gauss_function(x, a, x0, sigma):
return a * np.exp(-(x - x0) ** 2 / (2 * sigma ** 2))
test = gauss_function(x, max(data), mu, sigma)
plt.hist(data, bins=12)
plt.plot(x, test)
plt.show()
The result for this was,
But the actual Histogram is below, where Y-axis ranges from 0 to 8,
And I want to draw the density curve exactly on that. Any help this regards will be really appreciated.
Is this what you're looking for? I'm multiplying the pdf by the area of the histogram.
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
data = [125.36, 126.66, 130.28, 133.74, 126.92, 120.85, 119.42, 128.61, 123.53, 130.15, 126.02, 116.65, 125.24, 126.84,
125.95, 114.41, 138.62, 127.4, 127.59, 123.57, 133.76, 124.6, 113.48, 128.6, 121.04, 119.42, 120.83, 136.53, 120.4,
136.58, 121.73, 132.72, 109.25, 125.42, 117.67, 124.01, 118.74, 128.99, 131.11, 112.27, 118.76, 119.15, 122.42,
122.22, 134.71, 126.22, 130.33, 120.52, 126.88, 117.4]
(mu, sigma) = norm.fit(data)
x = np.linspace(min(data), max(data), 100)
values, bins, _ = plt.hist(data, bins=12)
area = sum(np.diff(bins) * values)
plt.plot(x, norm.pdf(x, mu, sigma) * area, 'r')
plt.show()
Result:

Python: scipy odeint stops integrating at maximum value and produces only half of intended plot

I am working on a piece of code which models the expansion of the universe, in particular I am integrating (using scipy odeint) the Friedmann equation to get a plot of the scale factor against time for different curvature. My code is:
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
from scipy.integrate import odeint
t_0 = 0.0004
a_0 = 0.001
omega_m = 1
omega_r = 0
omega_lambda = 0
omega_k = 1 - omega_lambda - omega_m - omega_r
H_0 = 1./13.799
def Friedmann(a, t):
dadt = H_0 * (((omega_m) * a**(-1)) + ((omega_r) * a**(-2)) + ((omega_lambda) * a**2) + (omega_k))**(1./2.)
return dadt
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
fig, ax = plt.subplots(nrows=1,ncols=1)
for omega_k in np.linspace(-1, 1, 3):
t = np.linspace(t_0,50,200)
a = odeint(Friedmann, a_0, t)
a = np.array(a).flatten()
ax.plot(t,a)
line1, = plt.plot(t, a, 'b')
plt.xlabel('Time/Gyr')
plt.ylabel('Scale factor a')
plt.grid(True)
plt.axis([0,50,0,5])
plt.show()
This is the plot I get. I am trying to get a plot where the bottom (light blue) line loops back down and meets the x-axis at t ~ 43 Gyr but I can't seem to get this (maybe something to do with the integration).
Any help is much appreciated.

How can I shade an area under a curve between two lines in matplotlib / pandas?

I'm trying to recreate this chart, more or less, using matplotlib:
Except in mine the center is 100 and the standard deviation is 16 (like IQ). This is what I have so far:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats as stats
x = np.linspace(50,150,100)
iq = stats.norm.pdf(x, 100, 16)
plt.plot(x,iq)
That generates a normal curve like this:
So far so good. But I'm at a loss for how to shade areas under the curve.
You can use plt.fill_between:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
plt.style.use('ggplot')
mean = 100
std = 16
x = np.linspace(mean - 5 * std, mean + 5 * std, 1000)
iq = stats.norm(mean, std)
plt.plot(x, iq.pdf(x), 'r-', lw=3)
colors = ['c', 'r', 'b', 'g', ]
colors = colors + list(reversed(colors))
for i, color in zip(range(-4, 4), colors):
low = mean + i * std
high = mean + (i + 1) * std
px = x[np.logical_and(x >= low, x <= high)]
plt.fill_between(
px,
iq.pdf(px),
color=color,
alpha=0.5,
linewidth=0,
)
plt.tight_layout()
plt.savefig('test.png', dpi=300)

Categories

Resources