skew normal distribution in scipy - python

Does anyone know how to plot a skew normal distribution with scipy?
I supose that stats.norm class can be used but I just can't figure out how.
Furthermore, how can I estimate the parameters describing the skew normal distribution of a unidimensional dataset?

From the Wikipedia description,
from scipy import linspace
from scipy import pi,sqrt,exp
from scipy.special import erf
from pylab import plot,show
def pdf(x):
return 1/sqrt(2*pi) * exp(-x**2/2)
def cdf(x):
return (1 + erf(x/sqrt(2))) / 2
def skew(x,e=0,w=1,a=0):
t = (x-e) / w
return 2 / w * pdf(t) * cdf(a*t)
# You can of course use the scipy.stats.norm versions
# return 2 * norm.pdf(t) * norm.cdf(a*t)
n = 2**10
e = 1.0 # location
w = 2.0 # scale
x = linspace(-10,10,n)
for a in range(-3,4):
p = skew(x,e,w,a)
plot(x,p)
show()
If you want to find the scale, location, and shape parameters from a dataset use scipy.optimize.leastsq, for example using e=1.0,w=2.0 and a=1.0,
fzz = skew(x,e,w,a) + norm.rvs(0,0.04,size=n) # fuzzy data
def optm(l,x):
return skew(x,l[0],l[1],l[2]) - fzz
print leastsq(optm,[0.5,0.5,0.5],(x,))
should give you something like,
(array([ 1.05206154, 1.96929465, 0.94590444]), 1)

The accepted answer is more or less outdated, because a skewnorm function is now implemented in scipy. So the code can be written a lot shorter:
from scipy.stats import skewnorm
import numpy as np
from matplotlib import pyplot as plt
X = np.linspace(min(your_data), max(your_data))
plt.plot(X, skewnorm.pdf(X, *skewnorm.fit(your_data)))

Related

Simultaneous numerical fit of two equations using Numpy least square method

I am trying to fit below mentioned two equations using python leastsq method but am not sure whether this is the right approach. First equation has incomplete gamma function in it while the second one is slightly complex, and along with an exponential function contains a term which is obtained by using a separate fitting formula.
J_mg = T_incomplete(hw/T_mag)
J_nmg = e^(-hw/T)*g(w,T)
Here g is a function of w and T and is calucated using a given fitting formula.
I am following the steps outlined in this question.
Here is what I have done
import numpy as np
from scipy.optimize import leastsq
from scipy.special import gammaincc
from scipy.special import gamma
from matplotlib.pyplot import plot
# generating data
NPTS = 10
hw = np.linspace(0.5, 10, NPTS)
j1 = np.linspace(0.001,10,NPTS)
j2 = np.linspace(0.003,10,NPTS)
T_mag = np.linspace(0.3,0.5,NPTS)
#defining functions
def calc_gaunt_factor(hw,T):
fitting_coeff= np.loadtxt('fitting_coeff.txt', skiprows=1)
#T is in KeV
#K_b = 8.6173303(50)e−5 ev/K
g = 0
gamma = 0.0136/T
theta= hw/T
A= (np.log10(gamma**2) +0.5)*0.4
B= (np.log10(theta)+1.5)*0.4
for i in range(11):
for j in range(11):
g_ij = fitting_coeff[i][j]*(A**i)*(B**j)
g = g_ij+g
return g
def j_w_mag(hw,T_mag):
order= 0.001
return np.sqrt(1/T_mag)*gamma(order)*gammaincc(order,hw/T_mag)
def j_w_nonmag(hw,T):
gamma = 0.0136/T
theta= hw/T
return np.sqrt(1/T)*np.exp((-hw)/T)*calc_gaunt_factor(hw,T)
def residual_func(T,T_mag,hw,j1,j2):
err_unmag = np.nan_to_num(j1 - j_w_nonmag(hw,T))
err_mag = np.nan_to_num(j2 - j_w_mag(hw,T_mag))
err= np.concatenate((err_unmag, err_mag))
return err
par_init = np.array([.35])
best, cov, info, message, ler = leastsq(residual_func,par_init,args=(T_mag,hw,j1,j2),full_output=True)
print("Best-Fit Parameters:")
print("T=%s" %(best[0]))
I am getting weird value for my fitting parameter, T. Is this the right approach? Thanks.

Error in minimization using scipy

I am trying to minimize a function using spicy but the estimate of sigma is way off. Any help would be greatly appreciated.
Here's my code so far:-
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from pylab import plot, show, grid, xlabel, ylabel
np.set_printoptions(linewidth=999999)
from scipy.stats import multivariate_normal
import scipy.stats as stats
from numpy import sqrt
from numpy import exp
from numpy import log
from numpy import pi
# Process parameters
beta=1.5
sigma=0.01
theta=0.7
T=50
N=1000
dt = T/N
#n=1 ## number of simulations
M = np.zeros((N))
#for k in range(n):
# Iterate to compute the steps of the Brownian motion.
for i in range(N):
M[i]=(theta + (M[i-1]-theta)*exp(-beta*dt)) + sigma*np.random.normal(0,sqrt((1-exp(-2*beta*dt))/2*beta))
M[0] = 0.7 ## initial value
#print(M)
def mle(params):
beta = params[0]
theta = params[1]
sigma = params[2]
alpha = exp(-beta*dt)
eta = (sigma**2)*(1-exp(-2*beta*dt))/(2*beta)
LL = -(-N/2*log(2*pi)-N*log(sqrt(eta)) - (np.sum((M[i]-M[i-1]*alpha - theta*(1-alpha))**2))/(2*eta))
return(LL)
initParams = [1, 1, 1]
#params1 = np.array([1,1,1])
res = minimize(mle, initParams ,method='nelder-mead')
print(res.x)
What I am getting is this:-
runfile('/Users/achalawasthi/Desktop/testmle.py', wdir='/Users/achalawasthi/Desktop')
[ 1.84035906e+00 7.41336913e-01 2.00523821e-23]
As you can see, the estimate of sigma is way off. I do not understand why this is happening ? My intuition is that there is some instability and that sigma just goes away. Would putting some bounds on sigma help?
Thanks

Density of multivariate t distribution in Python for large number of observations

I am trying to evaluate the density of multivariate t distribution of a 13-d vector. Using the dmvt function from the mvtnorm package in R, the result I get is
[1] 1.009831e-13
When i tried to write the function by myself in Python (thanks to the suggestions in this post:
multivariate student t-distribution with python), I realized that the gamma function was taking very high values (given the fact that I have n=7512 observations), making my function going out of range.
I tried to modify the algorithm, using the math.lgamma() and np.linalg.slogdet() functions to transform it to the log scale, but the result I got was
8.97669876e-15
This is the function that I used in python is the following:
def dmvt(x,mu,Sigma,df,d):
'''
Multivariate t-student density:
output:
the density of the given element
input:
x = parameter (d dimensional numpy array or scalar)
mu = mean (d dimensional numpy array or scalar)
Sigma = scale matrix (dxd numpy array)
df = degrees of freedom
d: dimension
'''
Num = math.lgamma( 1. *(d+df)/2 ) - math.lgamma( 1.*df/2 )
(sign, logdet) = np.linalg.slogdet(Sigma)
Denom =1/2*logdet + d/2*( np.log(pi)+np.log(df) ) + 1.*( (d+df)/2 )*np.log(1 + (1./df)*np.dot(np.dot((x - mu),np.linalg.inv(Sigma)), (x - mu)))
d = 1. * (Num - Denom)
return np.exp(d)
Any ideas why this functions does not produce the same results as the R equivalent?
Using as x = (0,0) produces similar results (up to a point, die to rounding) but with x = (1,1)1 I get a significant difference!
I finally managed to 'translate' the code from the mvtnorm package in R and the following script works without numerical underflows.
import numpy as np
import scipy.stats
import math
from math import lgamma
from numpy import matrix
from numpy import linalg
from numpy.linalg import slogdet
import scipy.special
from scipy.special import gammaln
mu = np.array([3,3])
x = np.array([1, 1])
Sigma = np.array([[1, 0], [0, 1]])
p=2
df=1
def dmvt(x, mu, Sigma, df, log):
'''
Multivariate t-student density. Returns the density
of the function at points specified by x.
input:
x = parameter (n x d numpy array)
mu = mean (d dimensional numpy array)
Sigma = scale matrix (d x d numpy array)
df = degrees of freedom
log = log scale or not
'''
p = Sigma.shape[0] # Dimensionality
dec = np.linalg.cholesky(Sigma)
R_x_m = np.linalg.solve(dec,np.matrix.transpose(x)-mu)
rss = np.power(R_x_m,2).sum(axis=0)
logretval = lgamma(1.0*(p + df)/2) - (lgamma(1.0*df/2) + np.sum(np.log(dec.diagonal())) \
+ p/2 * np.log(math.pi * df)) - 0.5 * (df + p) * math.log1p((rss/df) )
if log == False:
return(np.exp(logretval))
else:
return(logretval)
print(dmvt(x,mu,Sigma,df,True))
print(dmvt(x,mu,Sigma,df,False))

Python lognorm.cdf vs. formula based implementation not matching

Okay I am converting the scipy.stats.lognorm.cdf function over to a Cython function and using the formula here: http://www.cs.unitn.it/~taufer/SR/P-LN.pdf as 1/2 + 1/2* erf((ln(x)-mu)/sigma*sqrt(2). The results don't match, despite many other references to the same function online. EDIT: just fixed, only had to do np.log(mu) 2x ... Fixed code:
import numpy as np
from scipy.stats import lognorm
from scipy.special import erf
def lognormcdf(x, mu, sigma):
return 0.5 + 0.5*erf((np.log(x)-np.log(mu))/(np.sqrt(2.0)*sigma))
mu = 3.85
sigma = 0.346
x = [-9.997137267734412802e-01,-9.984919506395958377e-01,-9.962951347331251428e-01,-9.931249370374434227e-01,-9.889843952429917540e-01,-9.838775407060570410e-01,-9.778093584869183008e-01,-9.707857757637063933e-01,-9.628136542558155542e-01,-9.539007829254917414e-01,-9.440558701362560257e-01,-9.332885350430795146e-01,-9.216092981453339883e-01,-9.090295709825296777e-01,-8.955616449707269888e-01,-8.812186793850184108e-01,-8.660146884971646752e-01,-8.499645278795913139e-01,-8.330838798884008245e-01,-8.153892383391762033e-01,-7.968978923903144995e-01,-7.776279096494954635e-01,-7.575981185197071532e-01,-7.368280898020207470e-01,-7.153381175730564312e-01,-6.931491993558019926e-01,-6.702830156031409636e-01,-6.467619085141292912e-01,-6.226088602037077591e-01,-5.978474702471787694e-01,-5.725019326213811599e-01,-5.465970120650941455e-01,-5.201580198817630230e-01,-4.932107892081909473e-01,-4.657816497733580086e-01,-4.378974021720314913e-01,-4.095852916783015440e-01,-3.808729816246299582e-01,-3.517885263724216949e-01,-3.223603439005291449e-01,-2.926171880384719759e-01,-2.625881203715034751e-01,-2.323024818449739570e-01,-2.017898640957360157e-01,-1.710800805386032686e-01,-1.402031372361139672e-01,-1.091892035800611088e-01,-7.806858281343663497e-02,-4.687168242159163445e-02,-1.562898442154308370e-02,1.562898442154308370e-02,4.687168242159163445e-02,7.806858281343663497e-02,1.091892035800611088e-01,1.402031372361139672e-01,1.710800805386032686e-01,.017898640957360157e-01,2.323024818449739570e-01,2.625881203715034751e-01,2.926171880384719759e-01,3.223603439005291449e-01,3.517885263724216949e-01,3.808729816246299582e-01,4.095852916783015440e-01,4.378974021720314913e-01,4.657816497733580086e-01,4.932107892081909473e-01,5.201580198817630230e-01,5.465970120650941455e-01,5.725019326213811599e-01,5.978474702471787694e-01,6.226088602037077591e-01,6.467619085141292912e-01,6.702830156031409636e-01,6.931491993558019926e-01,7.153381175730564312e-01,7.368280898020207470e-01,7.575981185197071532e-01,7.776279096494954635e-01,7.968978923903144995e-01,8.153892383391762033e-01,8.330838798884008245e-01,8.499645278795913139e-01,8.660146884971646752e-01,8.812186793850184108e-01,8.955616449707269888e-01,9.090295709825296777e-01,9.216092981453339883e-01,9.332885350430795146e-01,9.440558701362560257e-01,9.539007829254917414e-01,9.628136542558155542e-01,9.707857757637063933e-01,9.778093584869183008e-01,9.838775407060570410e-01,9.889843952429917540e-01,9.931249370374434227e-01,9.962951347331251428e-01,9.984919506395958377e-01,9.997137267734412802e-01]
mycdf = lognormcdf(x, np.log(mu), sigma)
scipycdf = lognorm.cdf(x, scale=np.log(mu), s=sigma)
# This line comparing the Scipy function and mine displays the results below
np.sum(np.nan_to_num(mycdf)-scipycdf)
Results:
1.2011928779531548e-15
The original post was edited to reflect the correct formula.
def lognormcdf(x, mu, sigma):
return 0.5 + 0.5*erf((np.log(x)-np.log(mu))/(np.sqrt(2.0)*sigma))
Pass np.log(mu) in for mu and it works.

Plotting elliptical orbits

I'm trying to write a code that plots the elliptical paths of an object using the equation for the ellipse r=a(1-e^2)/(1+e*cos(theta)). I'd also like this data to be put into an array for other use.
from numpy import *#Imports Python mathematical functions library
import matplotlib.pyplot as plt #Imports plot library
from pylab import *
a = 5
e = 0.3
theta = 0
while theta <= 2*pi:
r = (a*(1-e**2))/(1+e*cos(theta))
print("r = ",r,"theta = ",theta)
plt.polar(theta, r)
theta += pi/180
plt.show()
The code spits out correct values for r and theta, but the plot is blank. The polar plot window appears, but there is nothing plotted.
Please help. Thanks in advance.
Do not call plt.polar once for every point. Instead, call it once, with all the data as input:
import numpy as np #Imports Python mathematical functions library
import matplotlib.pyplot as plt #Imports plot library
cos = np.cos
pi = np.pi
a = 5
e = 0.3
theta = np.linspace(0,2*pi, 360)
r = (a*(1-e**2))/(1+e*cos(theta))
plt.polar(theta, r)
print(np.c_[r,theta])
plt.show()
By the way, numpy can do the calculation as a two-liner, instead of using a while-loop:
theta = np.linspace(0,2*pi, 360) # 360 equally spaced values between 0 and 2*pi
r = (a*(1-e**2))/(1+e*cos(theta))
This defines theta and r as numpy arrays (rather than single values).
I think you need to do points.append([theta,r]) then at the end plt.polar(points) ... that makes a kinda neat design too
from numpy import *#Imports Python mathematical functions library
import matplotlib.pyplot as plt #Imports plot library
from pylab import *
a = 5
e = 0.3
theta = 0
points = []
while theta <= 2*pi:
r = (a*(1-e**2))/(1+e*cos(theta))
print("r = ",r,"theta = ",theta)
points.append((theta, r))
theta += pi/180
#plt.polar(points) #this is cool but probably not what you want
plt.polar(*zip(*points))
plt.show()

Categories

Resources