I'd like to reproduce Python code to R code about Stick-breaking process, which is one of construction schemes for Dirichlet Process. However, the plot I drew within R is quite different in that DP sample distributions are not around the base distribution, H.
The reference Python code is from Austin Rochford's blog.
from matplotlib import pyplot as plt
import numpy as np
import pymc3 as pm
import scipy.stats as ss
import seaborn as sns
from statsmodels.datasets import get_rdataset
from theano import tensor as T
np.random.seed(433)
N=20
K=30
alpha=50
H = ss.norm # base dist
beta = ss.beta.rvs(1,alpha, size=(N,K))
pi = np.empty_like(beta)
pi[:, 0] = beta[:,0]
pi[:, 1:] = beta[:, 1:] * (1-beta[:, :-1]).cumprod(axis=1)
omega = H.rvs(size=(N,K))
x_plot = np.linspace(-3,3,200)
sample_cdfs = (pi[..., np.newaxis]* np.less.outer(omega, x_plot)).sum(axis=1)
fig, ax = plt.subplots(figsize=(8,6))
ax.plot (x_plot, sample_cdfs[0],c="gray", alpha=0.75, label = "DP sample CDFs")
ax.plot(x_plot, sample_cdfs[1:].T, c="gray", alpha=0.75)
ax.plot(x_plot, H.cdf(x_plot), c= "k", label = "Base CDF")
ax.set_title(r'$\alpha = {}$'.format(alpha))
ax.legend(loc=2)
The figure on the right side is the result in Python code.
And I tried to convert it to R code:
library(yarrr)
N=20;K=30;ngrid=200;alpha=50
xgrid = seq(-3,3,length.out=ngrid)
betas = matrix(rbeta(N*K, 1, alpha),nr=N, nc=K)
stick.to.right = c(1, cumprod(1 - betas))[1:K]
pis.temp = stick.to.right * betas
omega = matrix(rnorm(N*K),nr=N,nc=K)
dirac = array(numeric(N*K*ngrid),dim=c(N,K,ngrid))
for(i in 1:N){
for(j in 1:K){
for(k in 1:ngrid){
dirac[i,j,k]=ifelse(omega[i,j]<xgrid[k],TRUE,FALSE)
}
}
}
pis = array(pis.temp,dim=c(N,K,200))
sample_cdfs = apply(pis* dirac,c(1,3),sum)
plot(xgrid,sample_cdfs[1,],col=piratepal("pony"),type="l",lwd=1,ylim=c(0,1))
for(i in 2:N) lines(xgrid,sample_cdfs[i,],col=piratepal("pony")[i])
lines(xgrid,pnorm(xgrid),lwd=2)
The plot I drew is DP with alpha=50:
How can I modify R code to give a similar result as Python code?
Related
I am trying to plot this data as a decaying exponential, all of the data has the same x values just the y values differ. y= a*[(-1)*exp(-x/t)].
I am not getting the correct chart when it goes through. csv file In the image is the type of curve I am looking for. I need to plot all of the data in csv (preferably on the same plot) in pycharm. I am relatively new to pycharm so I am starting from scratch! (excel just wouldn't behave for this data) Willing to start fresh as well if there is a simpler way of writing the code, I sparsed this together with some help from the internet.
import scipy.signal as scp
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import numpy.core.function_base
def decaying_exponential(x,a,t,c):
return a *(-1)* np.exp(-1 * (x) / t) + c
import os
for f in os.listdir("/Users/flyar/My Python Stuff/"):
print(f)
df = numpy.transpose(pd.read_csv("D:/Grad Lab/NMR/Data/T1 Data/mineral oil/F0009CH1.CSV", names= ['a','b','c','d']).to_numpy())
temp = scp.find_peaks(df[2], height = 0)
df_subset = [(df[1][n], df[2][n]) for n in temp[0]]
print(df_subset)
plt.scatter([df[2][n] for n in temp[0]], [df[1][n] for n in temp[0]])
y = np.linspace(min(df[2]), max(df[2]), 1000)
params, covs = curve_fit(decaying_exponential, [df[1][n] for n in temp[0][2::]],
[df[2][n] for n in temp[0][2::]], maxfev=10000)
print(params)
plt.plot(y, [decaying_exponential(l, 5, params[1], params[2]) for l in y])
plt.show()
I am trying to fit a quadratic function to some data, and I'm trying to do this without using numpy's polyfit function.
Mathematically I tried to follow this website https://neutrium.net/mathematics/least-squares-fitting-of-a-polynomial/ but somehow I don't think that I'm doing it right. If anyone could assist me that would be great, or If you could suggest another way to do it that would also be awesome.
What I've tried so far:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'b--')
plt.show()
As you can see my curve doesn't compare well to nnumpy's polyfit curve.
Update:
I went through my code and removed all the stupid mistakes and now it works, when I try to fit it over 3 points, but I have no idea how to fit over more than three points.
This is the new code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'r--')
plt.show()
Instead using Cramer's Rule, actually solve the system using least squares. Remember that Cramer's Rule will only work if the total number of points you have equals the desired order of polynomial plus 1.
If you don't have this, then Cramer's Rule will not work as you're trying to find an exact solution to the problem. If you have more points, the method is unsuitable as we will create an overdetermined system of equations.
To adapt this to more points, numpy.linalg.lstsq would be a better fit as it solves the solution to the Ax = b by computing the vector x that minimizes the Euclidean norm using the matrix A. Therefore, remove the y values from the last column of the features matrix and solve for the coefficients and use numpy.linalg.lstsq to solve for the coefficients:
import numpy as np
import matplotlib.pyplot as plt
ones = np.ones(4)
xfeature = np.asarray([0,1,2,3])
squaredfeature = xfeature ** 2
b = np.asarray([1,2,0,3])
features = np.concatenate((np.vstack(ones),np.vstack(xfeature),np.vstack(squaredfeature)), axis = 1) # Change - remove the y values
determinants = np.linalg.lstsq(features, b)[0] # Change - use least squares
plt.scatter(xfeature,b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
plt.show()
I get this plot now, which matches what the dashed curve is in your graph, also matching what numpy.polyfit gives you:
I have clustered my data (12000, 3) using sklearn Gaussian mixture model algorithm (GMM). I have 3 clusters. Each point of my data represents a molecular structure. I would like to know how could I sampled each cluster. I have tried with the function:
gmm = GMM(n_components=3).fit(Data)
gmm.sample(n_samples=20)
but it does preform a sampling of the whole distribution, but I need a sample of each one of the components.
Well this is not that easy since you need to calculate the eigenvectors of all covariance matrices. Here is some example code for a problem I studied
import numpy as np
from scipy.stats import multivariate_normal
import random
from operator import truediv
import itertools
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn import mixture
#import some data which can be used for gmm
mix = np.loadtxt("mixture.txt", usecols=(0,1), unpack=True)
#print(mix.shape)
color_iter = itertools.cycle(['navy', 'c', 'cornflowerblue', 'gold',
'darkorange'])
def plot_results(X, Y_, means, covariances, index, title):
#function for plotting the gaussians
splot = plt.subplot(2, 1, 1 + index)
for i, (mean, covar, color) in enumerate(zip(
means, covariances, color_iter)):
v, w = linalg.eigh(covar)
v = 2. * np.sqrt(2.) * np.sqrt(v)
u = w[0] / linalg.norm(w[0])
# as the DP will not use every component it has access to
# unless it needs it, we shouldn't plot the redundant
# components.
if not np.any(Y_ == i):
continue
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
# Plot an ellipse to show the Gaussian component
angle = np.arctan(u[1] / u[0])
angle = 180. * angle / np.pi # convert to degrees
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.5)
splot.add_artist(ell)
plt.xlim(-4., 3.)
plt.ylim(-4., 2.)
gmm = mixture.GaussianMixture(n_components=3, covariance_type='full').fit(mix.T)
print(gmm.predict(mix.T))
plot_results(mix.T, gmm.predict(mix.T), gmm.means_, gmm.covariances_, 0,
'Gaussian Mixture')
So for my problem the resulting plot looked like this:
Edit: here the answer to your comment. I would use pandas to do this. Assume X is your feature matrix and y are your labels, then
import pandas as pd
y_pred = gmm.predict(X)
df_all_info = pd.concat([X,y,y_pred], axis=1)
In the resulting dataframe you can check all the information you want, you can even just exclude the samples the algorithm misclassified with:
df_wrong = df_all_info[df_all_info['name of y-column'] != df_all_info['name of y_pred column']]
I have a function f(x,t) = cos(t)*t + x and i want to display the change of the result over the width x and time t at discretised time steps t_i and discretised width steps x_j.
Now I am a while here on SX and feel really embarrassed to only can post such little code or in other words nothing (since nothing worked I have done...):
Nevertheless if someone has the time to help, I`d appreciate it.
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as pyplot
from astropy.io.ascii.latex import AASTex
def func(xi, ti):
res = np.cos(ti)*ti + xi
return res
timeSpacing = 100
timeStart = 0
timeEnd = 1
time = np.linspace(timeStart, timeEnd, timeSpacing)
widthSpacing = 300
widthStart = 0
widthEnd = 3
width = np.linspace(widthStart, widthEnd, widthSpacing)
resultList = [None]*timeSpacing
resultListInner = [None]*widthSpacing
for i, ithTime in enumerate(time):
for j, jthWidth in enumerate(width):
aas = np.zeros_like(width)
aas.fill(ithTime)
resultListInner[j] = ithTime, jthWidth, func(jthWidth, aas)
resultList[i] = resultListInner
So how do I correctly index the list and array and plot my data using matplotlib?
My plot should look like this:
where in my case the aperature should be the width x, the sky annulus is my time t and the RMS is my func(x,t).
A couple of points:
Numpy provides a very nice function for doing differences of array elements: diff
Matplotlib uses plot_wireframe for creating a plot that you would want (also using Numpy's meshgrid)
Now, combining these into what you may want would look something like this.
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
def func(xi, ti):
res = np.cos(ti)*np.sin(xi)
return res
timeSpacing = 20
timeStart = 0
timeEnd = 1
time = np.linspace(timeStart, timeEnd, timeSpacing)
widthSpacing = 50
widthStart = 0
widthEnd = 3
width = np.linspace(widthStart, widthEnd, widthSpacing)
X,T = np.meshgrid(width,time)
F = func(X,T)
DF = np.diff(np.diff(F,axis=0),axis=1)
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.plot_wireframe(X[:-1,:-1],T[:-1,:-1],DF)
plt.show()
Note that diff is applied twice: once in each dimension axis= . I have also changed the toy function you provided to something that actually looks decent in this case.
For your more general use, it seems that you would want to just collect all of your F data into a 2D array, then proceed from the DF = line.
I need to plot the mathieu characteristic parameters for various q. The plot should show 'flute' shapes going from wide on the left, to very narrow on the right. The code below does this, but it also introduces a handful of inter-band jumps (obvious from the plotted figure). How can I fix this?
Thank you!
AM
import numpy as np
import scipy as sp
import scipy.special as spfun
from matplotlib import pyplot as plt
uplim =120#E_rec
Npts =1000
Nstates =8
q = np.linspace(0, uplim/4.0, Npts)
EA = np.zeros([Npts,Nstates])
EB = np.zeros([Npts,Nstates])
U = 4*q
print np.shape(EA) #plt.fill_between(U, EA[:,i], EB[:,i]) #plt.plot(U,Ea,U,Eb)
for i in range(Nstates):
a = spfun.mathieu_a(i,q)
b = spfun.mathieu_b(i+1,q)
EA[:,i] = a + 2*q
EB[:,i] = b + 2*q
plt.fill_between(U, EA[:,i], EB[:,i]) #plt.plot(U,Ea,U,Eb)
print np.shape(EA) #plt.fill_between(U, EA[:,i], EB[:,i]) #plt.plot(U,Ea,U,Eb)
plt.show()
EDIT As DSM and pv have pointed out, this is a scipy bug. The glitches get worse as you go out further. What I ended up doing was exporting tables of values that I wanted from Mathematica, and importing them into python and interpolating. Not great, but works.
I tried computing this with the latest release of the NAG Library for Python which included a new Mathieu function routine.
I pushed a little harder -- more states and a higher value of uplim.
%matplotlib inline
import numpy as np
import scipy as sp
import scipy.special as spfun
from naginterfaces.library import specfun
from matplotlib import pyplot as plt
uplim =150#E_rec
Npts = 4000
Nstates = 10
q = np.linspace(0, uplim/4.0, Npts)
EA = np.zeros([Npts,Nstates])
EB = np.zeros([Npts,Nstates])
U = 4*q
plt.figure(figsize=(15,8))
plt.subplot(1,2,1)
plt.title('Using SciPy')
for i in range(Nstates):
a = spfun.mathieu_a(i,q)
b = spfun.mathieu_b(i+1,q)
EA[:,i] = a + 2*q
EB[:,i] = b + 2*q
plt.fill_between(U, EA[:,i], EB[:,i]) #plt.plot(U,Ea,U,Eb)
plt.subplot(1,2,2)
plt.title('Using NAG')
for i in range(Nstates):
a = [specfun.mathieu_ang_periodic_real(ordval=i, q=qi, parity=0, mode=3)[2] for qi in q]
b = [specfun.mathieu_ang_periodic_real(ordval=i+1, q=qi, parity=1, mode=3)[2] for qi in q]
EA[:,i] = a + 2*q
EB[:,i] = b + 2*q
plt.fill_between(U, EA[:,i], EB[:,i])
plt.show()
This uses Mark 27 of the NAG Library and version 1.2.1 of ScipPy