associativity of matrix multiplication in numpy

associativity of matrix multiplication in numpy - python

I am playing with a simple numpy example and having hard time to understand why associative property of matrix multiplication
ABC = (AB)C = A(BC)
does not exactly hold. I assume the problem is with numeric stability. But how to address it? What is the issue exactly?
Here is my example with linear regression. I use sklearn solution as it gives more divergence between associative groupings:
import numpy as np
np.random.seed(42)
num_samples = 100
M = 1000
sigma = 0.5
X = np.random.binomial(2, 0.4, (num_samples, M))
beta = np.zeros(M)
beta[5] = 1.0
y = X.dot(beta) + sigma*np.random.randn(num_samples)
"standardise y"
y = y - np.mean(y)
y = y/np.std(y)
"center and standardise X"
Xc = X - X.mean(axis=0)
xstd = X.std(axis=0)
mask = xstd > 1e-12
Xc = Xc[:, mask]
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(Xc ,y)
beta_hat_sklearn = lr.coef_
beta_hat_sklearn.T # Xc.T # Xc # beta_hat_sklearn / num_samples
"equivalent < Python3.5"
beta_hat_sklearn.T.dot(Xc.T).dot(Xc).dot(beta_hat_sklearn) / num_samples
# 1.0000000000000009
beta_hat_sklearn.T # (Xc.T # Xc) # beta_hat_sklearn / num_samples
"equivalent < Python3.5"
beta_hat_sklearn.T.dot(Xc.T.dot( Xc )).dot(beta_hat_sklearn )/ num_samples
# 0.89517439485479278
Update
It might be MacOSX specific bug.

Related

NumPyro: sampling active sites as Bernoulli RVs

I want to modify the following NumPyro model:
import jax.numpy as jnp
from jax import random, vmap
import numpy as np
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS
numpyro.set_host_device_count(6)
def model(y=None, X=None):
n_predictors = X.shape[1]
with numpyro.plate('state', n_predictors):
theta = numpyro.sample('theta', dist.Gamma(concentration=1, rate=1/5000))
mu = jnp.dot(X, theta)
numpyro.sample('y', dist.Normal(loc=mu, scale=1), obs=y)
theta = np.zeros(5) # True parameters
theta[0] = 2
theta[1] = 3
X = np.random.randn(20, theta.size)**2 # Design matrix
y = X # theta + np.random.randn(X.shape[0]) # data
rng_key = random.PRNGKey(74674)
rng_key, rng_key_ = random.split(rng_key)
mcmc = MCMC(NUTS(model), num_warmup=500, num_samples=1000, num_chains=6)
mcmc.run(rng_key_, X=X, y=y)
mcmc.print_summary()
I want to include Bernoulli RVs z that choose which theta is active. Then I would like to make inference for these z. Basically, I am trying to do variable selection. The idea is illustrated in the following model (which fails):
def failed_model(y=None, X=None):
n_predictors = X.shape[1]
with numpyro.plate('state', n_predictors):
theta = numpyro.sample('theta', dist.Gamma(concentration=1, rate=1/5000))
z = numpyro.sample('z', dist.Bernoulli(0.1)
mu = jnp.dot(X, theta * z)
numpyro.sample('y', dist.Normal(loc=mu, scale=1), obs=y)
I tried to understand the second example from the [docs][1], but it does not show masking for a random array, but rather for a fixed array.
[1]: https://num.pyro.ai/en/stable/distributions.html

SKlearn Gaussian Process with constant, manually set correlation

I want to use the Gaussian Process approximation for a simple 1D test function to illustrate a few things. I want to iterate over a few different values for the correlation matrix (since this is 1D it is just a single value) and show what effect different values have on the approximation. My understanding is, that "theta" is the parameter for this. Therefore I want to set the theta value manually and don't want any optimization/changes to it. I thought the constant kernel and the clone_with_theta function might get me what I want but I didn't get it to work. Here is what I have so far:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as ConstantKernel
def f(x):
"""The function to predict."""
return x/2 + ((1/10 + x) * np.sin(5*x - 1))/(1 + x**2 * (np.sin(x - (1/2))**2))
# ----------------------------------------------------------------------
# Data Points
X = np.atleast_2d(np.delete(np.linspace(-1,1, 7),4)).T
y = f(X).ravel()
# Instantiate a Gaussian Process model
kernel = ConstantKernel(constant_value=1, constant_value_bounds='fixed')
theta = np.array([0.5,0.5])
kernel = kernel.clone_with_theta(theta)
gp = GaussianProcessRegressor(kernel=kernel, optimizer=None)
# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(x, return_std=True)
# Plot
# ...

I programmed a simple implementation myself now, which allows to set correlation (here 'b') manually:
import numpy as np
from numpy.linalg import inv
def f(x):
"""The function to predict."""
return x/2 + ((1/10 + x) * np.sin(5*x - 1))/(1 + x**2 * (np.sin(x - (1/2))**2))
def kriging_approx(x,xt,yt,b,mu,R_inv):
N = yt.size
one = np.matrix(np.ones((yt.size))).T
r = np.zeros((N))
for i in range(0,N):
r[i]= np.exp(-b * (xt[i]-x)**2)
y = mu + np.matmul(np.matmul(r.T,R_inv),yt - mu*one)
y = y[0,0]
return y
def calc_R (x,b):
N = x.size
# setup R
R = np.zeros((N,N))
for i in range(0,N):
for j in range(0,N):
R[i][j] = np.exp(-b * (x[i]-x[j])**2)
R_inv = inv(R)
return R, R_inv
def calc_mu_sig (yt, R_inv):
N = yt.size
one = np.matrix(np.ones((N))).T
mu = np.matmul(np.matmul(one.T,R_inv),yt) / np.matmul(np.matmul(one.T,R_inv),one)
mu = mu[0,0]
sig2 = (np.matmul(np.matmul((yt - mu*one).T,R_inv),yt - mu*one))/(N)
sig2 = sig2[0,0]
return mu, sig2
# ----------------------------------------------------------------------
# Data Points
xt = np.linspace(-1,1, 7)
yt = np.matrix((f(xt))).T
# Calc R
R, R_inv = calc_R(xt, b)
# Calc mu and sigma
mu_dach, sig_dach2 = calc_mu_sig(yt, R_inv)
# Point to get approximation for
x = 1
y_approx = kriging_approx(x, xt, yt, b, mu_dach, R_inv)

Why doesn't my custom made linear regression model match sklearn?

I'm attempting to create a simple linear model with Python using no libraries (other than numpy). Here's what I have
import numpy as np
import pandas
np.random.seed(1)
alpha = 0.1
def h(x, w):
return np.dot(w.T, x)
def cost(X, W, Y):
totalCost = 0
for i in range(47):
diff = h(X[i], W) - Y[i]
squared = diff * diff
totalCost += squared
return totalCost / 2
housing_data = np.loadtxt('Housing.csv', delimiter=',')
x1 = housing_data[:,0]
x2 = housing_data[:,1]
y = housing_data[:,2]
avgX1 = np.mean(x1)
stdX1 = np.std(x1)
normX1 = (x1 - avgX1) / stdX1
print('avgX1', avgX1)
print('stdX1', stdX1)
avgX2 = np.mean(x2)
stdX2 = np.std(x2)
normX2 = (x2 - avgX2) / stdX2
print('avgX2', avgX2)
print('stdX2', stdX2)
normalizedX = np.ones((47, 3))
normalizedX[:,1] = normX1
normalizedX[:,2] = normX2
np.savetxt('normalizedX.csv', normalizedX)
weights = np.ones((3,))
for boom in range(100):
currentCost = cost(normalizedX, weights, y)
if boom % 1 == 0:
print(boom, 'iteration', weights[0], weights[1], weights[2])
print('Cost', currentCost)
for i in range(47):
errorDiff = h(normalizedX[i], weights) - y[i]
weights[0] = weights[0] - alpha * (errorDiff) * normalizedX[i][0]
weights[1] = weights[1] - alpha * (errorDiff) * normalizedX[i][1]
weights[2] = weights[2] - alpha * (errorDiff) * normalizedX[i][2]
print(weights)
predictedX = [1, (2100 - avgX1) / stdX1, (3 - avgX2) / stdX2]
firstPrediction = np.array(predictedX)
print('firstPrediction', firstPrediction)
firstPrediction = h(firstPrediction, weights)
print(firstPrediction)
First, it converges VERY quickly. After only 14 iterations. Second, it gives me a different result than a linear regression with sklearn. For reference, my sklearn code is:
import numpy
import matplotlib.pyplot as plot
import pandas
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
dataset = pandas.read_csv('Housing.csv', header=None)
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 2].values
linearRegressor = LinearRegression()
xnorm = sklearn.preprocessing.scale(x)
scaleCoef = sklearn.preprocessing.StandardScaler().fit(x)
mean = scaleCoef.mean_
std = numpy.sqrt(scaleCoef.var_)
print('stf')
print(std)
stuff = linearRegressor.fit(xnorm, y)
predictedX = [[(2100 - mean[0]) / std[0], (3 - mean[1]) / std[1]]]
yPrediction = linearRegressor.predict(predictedX)
print('predictedX', predictedX)
print('predict', yPrediction)
print(stuff.coef_, stuff.intercept_)
My custom model predicts 337,000 for the value of y and sklearn predicts 355,000. My data is 47 rows that look like
2104,3,3.999e+05
1600,3,3.299e+05
2400,3,3.69e+05
1416,2,2.32e+05
3000,4,5.399e+05
1985,4,2.999e+05
1534,3,3.149e+05
Complete data available at https://github.com/shamoons/linear-logistic-regression/blob/master/Housing.csv
I assume either (a) my regression with gradient descent is somehow wrong or (b) I'm not using sklearn properly.
Any other reasons why the 2 wouldn't predict the same output for a given input?

I think you are missing the 1/m term (where m is the size of y) in the gradient descent. After including the 1/m term, I seem to get a predicted value similar to your sklearn code.
see below
....
weights = np.ones((3,))
m = y.size
for boom in range(100):
currentCost = cost(normalizedX, weights, y)
if boom % 1 == 0:
print(boom, 'iteration', weights[0], weights[1], weights[2])
print('Cost', currentCost)
for i in range(47):
errorDiff = h(normalizedX[i], weights) - y[i]
weights[0] = weights[0] - alpha *(1/m)* (errorDiff) * normalizedX[i][0]
weights[1] = weights[1] - alpha *(1/m)* (errorDiff) * normalizedX[i][1]
weights[2] = weights[2] - alpha *(1/m)* (errorDiff) * normalizedX[i][2]
...
this gives the firstprediction to be 355242.
This agrees well with the linear regression model even though it does not do gradient descent.
I also tried sgdregressor (uses stochastic gradient descent) in sklearn and it too seem to get a value close to linear regressor model and your model. see the code below
import numpy
import matplotlib.pyplot as plot
import pandas
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
dataset = pandas.read_csv('Housing.csv', header=None)
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 2].values
sgdRegressor = SGDRegressor(penalty='none', learning_rate='constant', eta0=0.1, max_iter=1000, tol = 1E-6)
xnorm = sklearn.preprocessing.scale(x)
scaleCoef = sklearn.preprocessing.StandardScaler().fit(x)
mean = scaleCoef.mean_
std = numpy.sqrt(scaleCoef.var_)
print('stf')
print(std)
yPrediction = []
predictedX = [[(2100 - mean[0]) / std[0], (3 - mean[1]) / std[1]]]
print('predictedX', predictedX)
for trials in range(10):
stuff = sgdRegressor.fit(xnorm, y)
yPrediction.extend(sgdRegressor.predict(predictedX))
print('predict', np.mean(yPrediction))
results in
predict 355533.10119985335

Normal Equation Implementation in Python / Numpy

I've written some beginner code to calculate the co-efficients of a simple linear model using the normal equation.
# Modules
import numpy as np
# Loading data set
X, y = np.loadtxt('ex1data3.txt', delimiter=',', unpack=True)
data = np.genfromtxt('ex1data3.txt', delimiter=',')
def normalEquation(X, y):
m = int(np.size(data[:, 1]))
# This is the feature / parameter (2x2) vector that will
# contain my minimized values
theta = []
# I create a bias_vector to add to my newly created X vector
bias_vector = np.ones((m, 1))
# I need to reshape my original X(m,) vector so that I can
# manipulate it with my bias_vector; they need to share the same
# dimensions.
X = np.reshape(X, (m, 1))
# I combine these two vectors together to get a (m, 2) matrix
X = np.append(bias_vector, X, axis=1)
# Normal Equation:
# theta = inv(X^T * X) * X^T * y
# For convenience I create a new, tranposed X matrix
X_transpose = np.transpose(X)
# Calculating theta
theta = np.linalg.inv(X_transpose.dot(X))
theta = theta.dot(X_transpose)
theta = theta.dot(y)
return theta
p = normalEquation(X, y)
print(p)
Using the small data set found here:
http://www.lauradhamilton.com/tutorial-linear-regression-with-octave
I get the co-efficients: [-0.34390603; 0.2124426 ] using the above code instead of: [24.9660; 3.3058]. Could anyone help clarify where I am going wrong?

You can implement normal equation like below:
import numpy as np
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
X_new = np.array([[0], [2]])
X_new_b = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance
y_predict = X_new_b.dot(theta_best)
y_predict

This assumes X is an m by n+1 dimensional matrix where x_0 always = 1 and y is a m-dimensional vector.
import numpy as np
step1 = np.dot(X.T, X)
step2 = np.linalg.pinv(step1)
step3 = np.dot(step2, X.T)
theta = np.dot(step3, y) # if y is m x 1. If 1xm, then use y.T

Your implementation is correct. You've only swapped X and y (look closely how they define x and y), that's why you get a different result.
The call normalEquation(y, X) gives [ 24.96601443 3.30576144] as it should.

Here is the normal equation in one line:
theta = np.dot(np.linalg.inv(np.dot(X.T,X)),np.dot(X.T,Y))

Constrained regression in Python

I have this simple regression model:
y = a + b * x + c * z + error
with a constraint on parameters:
c = b - 1
There are similar questions posted on SO (like Constrained Linear Regression in Python). However, the constraints' type is lb <= parameter =< ub.
What are the available options to handle this specific constrained linear regression problem?

This is how it can be done using GLM:
import statsmodels
import statsmodels.api as sm
import numpy as np
# Set the link function to identity
statsmodels.genmod.families.links.identity()
OLS_from_GLM = sm.GLM(y, sm.add_constant(np.column_stack(x, z)))
'''Setting the restrictions on parameters in the form of (R, q), where R
and q are constraints' matrix and constraints' values, respectively. As
for the restriction in the aforementioned regression model, i.e.,
c = b - 1 or b - c = 1, R = [0, 1, -1] and q = 1.'''
res_OLS_from_GLM = OLS_from_GLM.fit_constrained(([0, 1.0, -1.0], 1))
print(res_OLS_from_GLM.summary())

There are a few constrained optimization packages in Python such as CVX, CASADI, GEKKO, Pyomo, and others that can solve the problem. I develop Gekko for linear, nonlinear, and mixed integer optimization problems with differential or algebraic constraints.
import numpy as np
from gekko import GEKKO
# Data
x = np.random.rand(10)
y = np.random.rand(10)
z = np.random.rand(10)
# Gekko for constrained regression
m = GEKKO(remote=False); m.options.IMODE=2
a,b,c = m.Array(m.FV,3)
a.STATUS=1; b.STATUS=1; c.STATUS=1
x=m.Param(x); z=m.Param(z)
y = m.Var(); ym=m.Param(y)
m.Equation(y==a+b*x+c*z)
m.Equation(c==b-1)
m.Minimize((ym-y)**2)
m.options.SOLVER=1
m.solve(disp=True)
print(a.value[0],b.value[0],c.value[0])
This gives the solution that may be different when you run it because it uses random values for the data.
-0.021514129645 0.45830726553 -0.54169273447
The constraint c = b - 1 is satisfied with -0.54169273447 = 0.45830726553 - 1. Here is a comparison to other linear regression packages in Python with an without constraints:
import numpy as np
from scipy.stats import linregress
import statsmodels.api as sm
import matplotlib.pyplot as plt
from gekko import GEKKO
# Data
x = np.array([4,5,2,3,-1,1,6,7])
y = np.array([0.3,0.8,-0.05,0.1,-0.8,-0.5,0.5,0.65])
# calculate R^2
def rsq(y1,y2):
yresid= y1 - y2
SSresid = np.sum(yresid**2)
SStotal = len(y1) * np.var(y1)
r2 = 1 - SSresid/SStotal
return r2
# Method 1: scipy linregress
slope,intercept,r,p_value,std_err = linregress(x,y)
a = [slope,intercept]
print('R^2 linregress = '+str(r**2))
# Method 2: numpy polyfit (1=linear)
a = np.polyfit(x,y,1); print(a)
yfit = np.polyval(a,x)
print('R^2 polyfit = '+str(rsq(y,yfit)))
# Method 3: numpy linalg solution
# y = X a
# X^T y = X^T X a
X = np.vstack((x,np.ones(len(x)))).T
# matrix operations
XX = np.dot(X.T,X)
XTy = np.dot(X.T,y)
a = np.linalg.solve(XX,XTy)
# same solution with lstsq
a = np.linalg.lstsq(X,y,rcond=None)[0]
yfit = a[0]*x+a[1]; print(a)
print('R^2 matrix = '+str(rsq(y,yfit)))
# Method 4: statsmodels ordinary least squares
X = sm.add_constant(x,prepend=False)
model = sm.OLS(y,X).fit()
yfit = model.predict(X)
a = model.params
print(model.summary())
# Method 5: Gekko for constrained regression
m = GEKKO(remote=False); m.options.IMODE=2
c = m.Array(m.FV,2); c[0].STATUS=1; c[1].STATUS=1
c[1].lower=-0.5
xd = m.Param(x); yd = m.Param(y); yp = m.Var()
m.Equation(yp==c[0]*xd+c[1])
m.Minimize((yd-yp)**2)
m.solve(disp=False)
c = [c[0].value[0],c[1].value[1]]
print(c)
# plot data and regressed line
plt.plot(x,y,'ko',label='data')
xp = np.linspace(-2,8,100)
slope = str(np.round(a[0],2))
intercept = str(np.round(a[1],2))
eqn = 'LstSQ: y='+slope+'x'+intercept
plt.plot(xp,a[0]*xp+a[1],'r-',label=eqn)
slope = str(np.round(c[0],2))
intercept = str(np.round(c[1],2))
eqn = 'Constraint: y='+slope+'x'+intercept
plt.plot(xp,c[0]*xp+c[1],'b--',label=eqn)
plt.grid()
plt.legend()
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

associativity of matrix multiplication in numpy - python

Related

NumPyro: sampling active sites as Bernoulli RVs

SKlearn Gaussian Process with constant, manually set correlation

Why doesn't my custom made linear regression model match sklearn?

Normal Equation Implementation in Python / Numpy

Constrained regression in Python

Categories

Resources