I am trying to use scipy.odr to create a regression of data with uncertanty in both the x-data and the y-data. I have read that i need to use the attribute output.sd_beta. However i seem to get some weird results.
The following code shows that sd_beta returns zero if the uncertanty of the data is zero even though the data is noisy. However np.sqrt(np.diag(regression.cov_beta)) does the excact opposite. I think i have to add the uncertanty from a noisy signal to the uncertanty of the data as follows uPopt = np.sqrt(np.diag(regression.cov_beta)) + output.sd_beta., but i am unsure. Can anyone please confirm or deny my gutfeeling?
import numpy as np
import scipy.odr as odr
def lin(B, x):
b = B[0]
return b + 0 * x
def odrWrapper(description, x, y, sx, sy):
# Function to create a regression using ODR and print the output
data = odr.RealData(x, y, sx, sy)
regression = odr.ODR(data, odr.Model(lin), beta0=[1])
regression = regression.run()
popt = regression.beta
cov_beta = np.sqrt(np.diag(regression.cov_beta))
sd_beta = regression.sd_beta
print(description, popt, sd_beta, cov_beta)
# constants
b = 50
n = 10000
noiseScale = 10
uncert = 1
np.random.seed(0)
# no noise no uncertanty
x = np.linspace(0, 100, n)
y = np.ones(n) * b
sx = [1e-10] * n # very smalle value as the uncertanty can not be zero
sy = [1e-10] * n # very smalle value as the uncertanty can not be zero
odrWrapper('No noise no uncertanty: ', x, y, sx, sy)
>> No noise no uncertanty: [50.] [0.] [1.e-12]
# noise but no uncertanty
x = np.linspace(0, 100, n)
y = np.ones(n) * b
y += noiseScale * (2 * np.random.rand(n) - 1)
sx = [1e-10] * n
sy = [1e-10] * n
odrWrapper('Noise but no uncertanty: ', x, y, sx, sy)
>> Noise but no uncertanty: [49.92917783] [0.05792112] [1.e-12]
# no noise but uncertanty
x = np.linspace(0, 100, n)
y = np.ones(n) * b
sx = [1e-10] * n
sy = [uncert] * n
odrWrapper('No noise but uncertanty: ', x, y, sx, sy)
>> No noise but uncertanty: [50.] [0.] [0.01]
# noise and uncertanty
x = np.linspace(0, 100, n)
y = np.ones(n) * b
y += noiseScale * (2 * np.random.rand(n) - 1)
sx = [1e-10] * n
sy = [1] * n
odrWrapper('Noise and uncertanty: ', x, y, sx, sy)
>> Noise and uncertanty: [49.90479242] [0.05826096] [0.01]
Related
I am attempting to plot the nullcline (steady state) curves of the Oregonator model to assert the existence of a limit cycle by applying the Poincare-Bendixson Theorem. I am close, but for some reason the plot that is produced shows two straight lines. I think it has something to do with the plotting stage. Any ideas?
Also any hints for how to construct a quadrilateral to apply the theorem with would be most appreciated.
Code:
import numpy as np
import matplotlib.pyplot as plt
# Dimensionless parameters
eps = 0.04
q = 0.0008
f = 1
# Oregonator model as numpy array
def Sys(Y, t = 0):
return np.array((Y[0] * (1 - Y[0] - ((Y[0] - q) * f * Y[1]) / (Y[0] + q)) / eps, Y[0] - Y[1] ))
# Oregonator model steady states
def g(x,z):
return (x * (1 - x) + ((q - x) * f * z) / (q + x)) / eps
def h(x,z):
return x - z
# Initial lists containing values
x = []
z = []
def sys(iv1, iv2, dt, time):
# initial values:
x.append(iv1)
z.append(iv2)
# Compute and fill lists
for i in range(time):
x.append(x[i] + (g(x[i],z[i])) * dt)
z.append(z[i] + (h(x[i],z[i])) * dt)
return x, z
sys(1, 0.5, 0.01, 30)
# Locate and find equilibrium points
eqp = []
def find_fixed_points(r):
for x in range(r):
for z in range(r):
if ((g(x, z) == 0) and (h(x, z) == 0)):
eqp.append((x,z))
return eqp
# Plot nullclines
plt.plot([0,2],[2,0], 'r-', lw=2, label='x-nullcline')
plt.plot([1,1],[0,2], 'b-', lw=2, label='z-nullcline')
# Plot equilibrium points
for point in eqp:
plt.plot(point[0],point[1],"red", marker = "o", markersize = 10.0)
plt.legend(loc='best')
x = np.linspace(0, 2, 20)
z = np.linspace(0, 2, 20)
X1 , Z1 = np.meshgrid(x, z) # Create a grid
DX1, DZ1 = Sys([X1, Z1]) # Compute reaction rate on the grid
M = (np.hypot(DX1, DZ1)) # Norm reaction rate
M[ M == 0] = 1. # Avoid zero division errors
DX1 /= M # Normalise each arrows
DZ1 /= M
plt.quiver(X1, Z1, DX1, DZ1, M, pivot='mid')
plt.xlabel("x(\u03C4)")
plt.ylabel("z(\u03C4)")
plt.legend()
plt.grid()
plt.show()
I am trying to implement gradient descent in python. Though my code is returning result by I think results I am getting are completely wrong.
Here is the code I have written:
import numpy as np
import pandas
dataset = pandas.read_csv('D:\ML Data\house-prices-advanced-regression-techniques\\train.csv')
X = np.empty((0, 1),int)
Y = np.empty((0, 1), int)
for i in range(dataset.shape[0]):
X = np.append(X, dataset.at[i, 'LotArea'])
Y = np.append(Y, dataset.at[i, 'SalePrice'])
X = np.c_[np.ones(len(X)), X]
Y = Y.reshape(len(Y), 1)
def gradient_descent(X, Y, theta, iterations=100, learningRate=0.000001):
m = len(X)
for i in range(iterations):
prediction = np.dot(X, theta)
theta = theta - (1/m) * learningRate * (X.T.dot(prediction - Y))
return theta
theta = np.random.randn(2,1)
theta = gradient_descent(X, Y, theta)
print('theta',theta)
The result I get after running this program is:
theta [[-5.23237458e+228]
[-1.04560188e+233]]
Which are very high values. Can someone point out the mistake I have made in implementation.
Also, 2nd problem is I have to set value of learning rate very low (in this case i have set to 0.000001) to work other wise program throws an error.
Please help me in diagnosis the problem.
try to reduce the learning rate with iteration otherwise it wont be able to reach the optimal lowest.try this
import numpy as np
import pandas
dataset = pandas.read_csv('start.csv')
X = np.empty((0, 1),int)
Y = np.empty((0, 1), int)
for i in range(dataset.shape[0]):
X = np.append(X, dataset.at[i, 'R&D Spend'])
Y = np.append(Y, dataset.at[i, 'Profit'])
X = np.c_[np.ones(len(X)), X]
Y = Y.reshape(len(Y), 1)
def gradient_descent(X, Y, theta, iterations=50, learningRate=0.01):
m = len(X)
for i in range(iterations):
prediction = np.dot(X, theta)
theta = theta - (1/m) * learningRate * (X.T.dot(prediction - Y))
learningRate/=10;
return theta
theta = np.random.randn(2,1)
theta = gradient_descent(X, Y, theta)
print('theta',theta)
I am trying to write a function for Gibbs sampler in the Bayesian framework. I got the code from this [website][1], which is a straightforward regression model. However, I am tackling a more complicated model which is: y= beta0 + beta1* x + x^gamma * sigma * epsilon where sigma is the variance of the model. That means I need to estimate p(beta0|y,x,beta1,sigma,gamma) and so on(in the Gibbs sampler method). my question is how should I modify the code to sample beta0, beta1 and other variables as there are extra variables to condition on.
My codes are:
import numpy as np
import pymc as pm
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['figure.figsize'] = (10, 5)
conda install -c conda-forge pymc3=3.0
def sample_beta_0(y, x, beta_1, sigma, gamma, mu_0, tau_0):
N = len(y)
assert len(x) == N
tau_i = 1/((x**gamma)*sigma)**2
precision = tau_0 + sum(tau_i)
mean = tau_0 * mu_0 + np.sum((y - beta_1 * x)*tau_i)
mean /= precision
return np.random.normal(mean, 1 / np.sqrt(precision))
def sample_beta_1(y, x, beta_0, sigma, mu_1, sigma_1):
N = len(y)
assert len(x) == N
precision = sigma_1 + sigma * np.sum(x * x)
mean = sigma_1 * mu_1 + sigma * np.sum( (y - beta_0) * x)
mean /= precision
return np.random.normal(mean, 1 / np.sqrt(precision))
def sample_sigma(y, x, beta_0, beta_1, alpha, beta):
N = len(y)
alpha_new = alpha + N / 2
resid = y - beta_0 - beta_1 * x
beta_new = beta + np.sum(resid * resid) / 2
return np.random.gamma(alpha_new, 1 / beta_new)
beta_0_true = -1
beta_1_true = 2
sigma_true = 1
N = 50
x = np.random.uniform(low=0, high=4, size=N)
y = np.random.normal(beta_0_true + beta_1_true * x, 1 / np.sqrt(sigma_true))
synth_plot = plt.plot(x, y, "o")
plt.xlabel("x")
plt.ylabel("y")
# print('Y are', y)
# print('X are', x)
plt.show()
"""GIBBS Sampler"""
# specify initial values
init = {"beta_0": 0,
"beta_1": 0,
"sigma": 2}
# specify hyper parameters
hypers = {"mu_0": 0,
"sigma_0": 1,
"mu_1": 0,
"sigma_1": 1,
"alpha": 2,
"beta": 1}
def gibbs(y, x, iters, init, hypers):
assert len(y) == len(x)
beta_0 = init["beta_0"]
beta_1 = init["beta_1"]
sigma = init["sigma"]
trace = np.zeros((iters, 3)) # trace to store values of beta_0, beta_1, sigma
for it in range(iters):
beta_0 = sample_beta_0(y, x, beta_1, sigma, hypers["mu_0"], hypers["sigma_0"])
beta_1 = sample_beta_1(y, x, beta_0, sigma, hypers["mu_1"], hypers["sigma_1"])
sigma = sample_sigma(y, x, beta_0, beta_1, hypers["alpha"], hypers["beta"])
trace[it, :] = np.array((beta_0, beta_1, sigma))
trace = pd.DataFrame(trace)
trace.columns = ['beta_0', 'beta_1', 'sigma']
print(trace)
return trace
iters = 1000
trace = gibbs(y, x, iters, init, hypers)
traceplot = trace.plot()
traceplot.set_xlabel("Iteration")
traceplot.set_ylabel("Parameter value")
trace_burnt = trace[500:999]
hist_plot = trace_burnt.hist(bins = 30, layout = (1,3))
print(trace_burnt.median())
print(trace_burnt.std())
I know is really long but please help!
I'm trying to produce 2D perlin noise using numpy, but instead of something smooth I get this :
my broken perlin noise, with ugly squares everywhere
For sure, I'm mixing up my dimensions somewhere, probably when I combine the four gradients ... But I can't find it and my brain is melting right now. Anyone can help me pinpoint the problem ?
Anyway, here is the code:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
def perlin(x,y,seed=0):
# permutation table
np.random.seed(seed)
p = np.arange(256,dtype=int)
np.random.shuffle(p)
p = np.stack([p,p]).flatten()
# coordinates of the first corner
xi = x.astype(int)
yi = y.astype(int)
# internal coordinates
xf = x - xi
yf = y - yi
# fade factors
u = fade(xf)
v = fade(yf)
# noise components
n00 = gradient(p[p[xi]+yi],xf,yf)
n01 = gradient(p[p[xi]+yi+1],xf,yf-1)
n11 = gradient(p[p[xi+1]+yi+1],xf-1,yf-1)
n10 = gradient(p[p[xi+1]+yi],xf-1,yf)
# combine noises
x1 = lerp(n00,n10,u)
x2 = lerp(n10,n11,u)
return lerp(x2,x1,v)
def lerp(a,b,x):
"linear interpolation"
return a + x * (b-a)
def fade(t):
"6t^5 - 15t^4 + 10t^3"
return 6 * t**5 - 15 * t**4 + 10 * t**3
def gradient(h,x,y):
"grad converts h to the right gradient vector and return the dot product with (x,y)"
vectors = np.array([[0,1],[0,-1],[1,0],[-1,0]])
g = vectors[h%4]
return g[:,:,0] * x + g[:,:,1] * y
lin = np.linspace(0,5,100,endpoint=False)
y,x = np.meshgrid(lin,lin)
plt.imshow(perlin(x,y,seed=0))
Thanks to Paul Panzer and a good night of sleep it works now ...
import numpy as np
import matplotlib.pyplot as plt
def perlin(x, y, seed=0):
# permutation table
np.random.seed(seed)
p = np.arange(256, dtype=int)
np.random.shuffle(p)
p = np.stack([p, p]).flatten()
# coordinates of the top-left
xi, yi = x.astype(int), y.astype(int)
# internal coordinates
xf, yf = x - xi, y - yi
# fade factors
u, v = fade(xf), fade(yf)
# noise components
n00 = gradient(p[p[xi] + yi], xf, yf)
n01 = gradient(p[p[xi] + yi + 1], xf, yf - 1)
n11 = gradient(p[p[xi + 1] + yi + 1], xf - 1, yf - 1)
n10 = gradient(p[p[xi + 1] + yi], xf - 1, yf)
# combine noises
x1 = lerp(n00, n10, u)
x2 = lerp(n01, n11, u) # FIX1: I was using n10 instead of n01
return lerp(x1, x2, v) # FIX2: I also had to reverse x1 and x2 here
def lerp(a, b, x):
"linear interpolation"
return a + x * (b - a)
def fade(t):
"6t^5 - 15t^4 + 10t^3"
return 6 * t**5 - 15 * t**4 + 10 * t**3
def gradient(h, x, y):
"grad converts h to the right gradient vector and return the dot product with (x,y)"
vectors = np.array([[0, 1], [0, -1], [1, 0], [-1, 0]])
g = vectors[h % 4]
return g[:, :, 0] * x + g[:, :, 1] * y
lin = np.linspace(0, 5, 100, endpoint=False)
x, y = np.meshgrid(lin, lin) # FIX3: I thought I had to invert x and y here but it was a mistake
plt.imshow(perlin(x, y, seed=2), origin='upper')
I have two numpy arrays
X.shape = (100, 10)
Y.shape = (100, 10)
I want to find the pearson correlations between columns of X and Y
i.e.
from scipy.stats.stats import pearsonr
def corr( X, Y ):
return np.array([ pearsonr( x, y )[0] for x,y in zip( X.T, Y.T ) ] )
corr( X, Y ).shape = (10, )
Is there a function for this? So far, all the functions I can find calculate correlation matrices. There is a pairwise correlation function in Matlab, so I'm pretty sure someone must have written one for Python.
The reason why I don't like the example function above is because it seems slow.
If columns are variables and rows are observations in X, Y (and you would like to find column-wise correlations between X and Y):
X = (X - X.mean(axis=0)) / X.std(axis=0)
Y = (Y - Y.mean(axis=0)) / Y.std(axis=0)
pearson_r = np.dot(X.T, Y) / X.shape[0]
To find the p-value, convert the pearson_r to t statistics:
t = pearson_r * np.sqrt(X.shape[0] - 2) / np.sqrt(1 - pearson_r ** 2)
and the p-values is 2 × P(T > t).
I modified from scipy.stats.pearsonr:
from scipy.stats import pearsonr
x = np.random.rand(100, 10)
y = np.random.rand(100, 10)
def corr( X, Y ):
return np.array([ pearsonr( x, y )[0] for x,y in zip( X.T, Y.T) ] )
def pair_pearsonr(x, y, axis=0):
mx = np.mean(x, axis=axis, keepdims=True)
my = np.mean(y, axis=axis, keepdims=True)
xm, ym = x-mx, y-my
r_num = np.add.reduce(xm * ym, axis=axis)
r_den = np.sqrt((xm*xm).sum(axis=axis) * (ym*ym).sum(axis=axis))
r = r_num / r_den
return r
np.allclose(pair_pearsonr(x, y, axis=0), corr(x, y))