Strange image of a polynomial function - python

I have some troubles with plotting a polynomial with confidence bounds. Here's my code:
import matplotlib.pyplot as plt
import numpy as np
X = np.array([-5.965215369881319, -40.41538208207736, -15.584956679988448, -6.073510488327594, -11.784890508714675, -7.754674907524617, -17.482364474520395, 2.4347468437246667, -16.133111795228572, -15.815302670890363, 5.9730059659614305, -19.249139823392717, 4.044936045002517, -7.102200416593474, 5.035187269390729, -23.543269648523623, -12.593308808761405, -21.08859785268947, -24.712871361819676, 2.4347468437246667, -21.028901691001877, -15.815302670890363, 7.208054914877421, -29.6589088548177])
Y = np.array([-2.6822693448184607, -23.168555618313547, -3.6166894384329806, -3.5137320916685866, -3.770179381108618, -12.788411352407874, -15.698803377485447, 1.9978332067376703, -11.838042662997829, -8.377671546754629, 8.109573809406804, -14.749849913813343, 2.8160696371542833, -3.3810722874645083, 5.560322978176329, -16.710386872172883, -6.795050134412731, -9.855604995547115, -25.386715163603533, 1.9978332067376703, -11.828949808296766, -8.402106796338003, 7.631911984593458, -18.155638519731614])
#
plt.plot(X, Y, '.')
poly_degree = 5
sd_cutoff = 1 # 2 keeps everything
coeffs = np.polyfit(X, Y, poly_degree)
poly_eqn = np.poly1d(coeffs)
Y_hat = poly_eqn(X)
delta = Y - Y_hat
sd_p = np.std(delta)
ok = abs(delta) < sd_p * sd_cutoff
plt.scatter(X, Y, color=np.where(ok, 'k', 'r'))
plt.fill_between(
X,
Y_hat - sd_p * sd_cutoff,
Y_hat + sd_p * sd_cutoff,
color='#00000020')
plt.plot(X, Y_hat)
Why my polynomial seems so strange?
https://imgur.com/a/hf4gY3P

Since the linked question does not provide a sorting solution, here the code that will sort X-Y pairs:
...
ind = np.argsort(X)
X = X[ind]
Y = Y[ind]
...
Output:

Related

Runtime error fitting a binary 2d function using python

I try to fit a function to extract parameters from a binary 2d grating in python.
Here is my code, which runs but does not deliver a proper output:
import numpy as np
import pylab as plt
from scipy.optimize import curve_fit
def grid(X, Y, P, FS):
"""
function to calculate Z(X, Y) of a binary grating with
period P and feature size FS
input:
X, Y (np.array) from numpy meshgrid, the domain of the function
P(float, int): period of the grating
FS(float, int): size of the grating features
output:
Z(np.array): binary heightprofile of the grating conainting 0 and 1
same shape as X and Y
"""
Z = np.ones_like(X)
Z[X%P>FS] = 0
Z[Y%P>FS] = 0
return Z
# domain of the example
x = np.arange(0, 500)
y = np.arange(0, 500)
X, Y = np.meshgrid(x, y)
# plot of the example grating
Z = grid(X, Y, 93, 42)
plt.contourf(X, Y, Z)
plt.show()
None
# here starts the fit
# np.ravel is used in combination with scipy.optimize.curve_fit like in every example I found online
# goal: find the values of P and FS used to generate Z
xdata = np.vstack((X.ravel(), Y.ravel()))
ydata = Z.ravel()
def _grid(xdata, P, FS):
"""
helper function to call grid(X, Y, P, FS) with the flattend input used
for the curve_fit
returns the result of Z in same flatted manner
"""
# unpack x, y and generate the meshgrid
x, y = xdata
x = np.unique(x)
y = np.unique(y)
X, Y = np.meshgrid(x, y)
# call the original function and return the flattend result
res = grid(X, Y, P, FS)
return res.ravel()
# try to fit the parameters
popt, pcov = curve_fit(_grid, xdata, ydata, p0=[90, 40])
print (popt)
print (pcov)
Does someone else maybe spot the problem? Or is there a better way or programming languge to do this simple fit?

3D- Gaussian Process Regression

I am very new to Gaussian processes and python as well.
I am trying to produce a very simple Gaussian regression for a 3d model.
I have a very simple Python code for a function:
import numpy as np
def exponential_cov(x, y, params):
return params[0] * np.exp( -0.5 * params[1] * np.subtract.outer(x, y)**2)
def conditional(x_new, x, y, params):
B = exponential_cov(x_new, x, params)
C = exponential_cov(x, x, params)
A = exponential_cov(x_new, x_new, params)
mu = np.linalg.inv(C).dot(B.T).T.dot(y)
sigma = A - B.dot(np.linalg.inv(C).dot(B.T))
return(mu.squeeze(), sigma.squeeze())
import matplotlib.pylab as plt
# GP PRIOR
tu = [1, 10]
Si_tu = exponential_cov(0, 0, tu)
xpts = np.arange(-5, 5, step=0.01)
plt.errorbar(xpts, np.zeros(len(xpts)), yerr=Si_tu, capsize=0, color='#95daed', alpha=0.5, label='error') #error
plt.plot(xpts, np.zeros(len(xpts)), linestyle='dashed', color='#3105b2', linewidth=2.5, label='mu'); #mu
# GP FOR 1ST POINT
x = [1.]
y = np.sin(x)+np.cos(np.sqrt(15)*x)
Si_1 = exponential_cov(x, x, tu)
def predict(x, data, kernel, params, sigma, t):
k = [kernel(x, y, params) for y in data]
Sinv = np.linalg.inv(sigma)
y_pred = np.dot(k, Sinv).dot(t)
sigma_new = kernel(x, x, params) - np.dot(k, Sinv).dot(k)
return y_pred, sigma_new
x_pred = np.linspace(-5, 5, 1000) #change step here!!
print "x_pred="
print(x_pred)
predictions = [predict(i, x, exponential_cov, tu, Si_1, y) for i in x_pred]
y_pred, sigmas = np.transpose(predictions)
print "y_pred ="
print(y_pred )
print "sigmas ="
print(sigmas )
# GP FOR 2ND POINT
m, s = conditional([-1], x, y, tu)
y2 = np.sin(-1)+np.cos(np.sqrt(15)*(-1))
x.append(-1)
y=np.append(y,y2)
Si_2 = exponential_cov(x, x, tu)
predictions = [predict(i, x, exponential_cov, tu, Si_2, y) for i in x_pred]
y_pred, sigmas = np.transpose(predictions)
print "y_pred ="
print(y_pred )
print "sigmas ="
print(sigmas )
By using this code I get very nice fitting results for the function np.sin(x) + np.cos(np.sqrt(15) * x), but what I really want to do is to try the same Gaussian process for the function Z = np.sin(2*X) * np.cos(2*Y) / 2.
I know that the idea is basically the same, but I cannot adapt my python code to the [x,y] input to obtain z.
I will really appreciate your help, hints or links!
In the previous, the input of your function is 1-D, and then the new function is 2-D. So you have to change the covariance function, for example, use ard-based kernel, please refer to cook book for kernel. Also, you can do the isotropic kernel for 2-D, just make sure the suitable distance function (e.g. L2-norm) and the single lengthscale you choose.

lambda: what's the output that a lambda function multiply numpy array?

I am learning ML with python. I read the below code from that book.
x, y = np.array(x), np.array(y)
x = (x - x.mean()) / x.std()
x0 = np.linspace(-2, 4, 100)
def get_model(deg):
return lambda input_x=x0: np.polyval(np.polyfit(x, y, deg), input_x)
def get_cost(deg, input_x, input_y):
return 0.5 * ((get_model(deg)(input_x) - input_y) ** 2).sum()
I'm not sure why in the get_cost function, the author uses get_model(deg) to multiply input_x which is x. In my understanding, get_model(deg) function already return the predicted y based on x0.
When I tried to understand what's happening, I typed get_model(4), then it returned <function __main__.get_model.<locals>.<lambda>>. To my surprised, it haven't returned the predicted y based on x0 but a function?! I just totally messed up.
When I tried typing get_model(4)(x), It just return the predicted y based on x, I don't get it. Please someone could help me to figure out.
The method get_model(x) is, as you noticed, not return predictions, but a model for predicting.
If you execute get_model(1) the method will return you a linear model, which allows you to fit your values into a linear function:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.gcf()
fig.set_size_inches(10, 5)
x = np.linspace(-2, 4, 200)
y = x**2
y += np.random.rand(len(x)) * 10
x0= x
def get_model(deg):
return lambda input_x=x0: np.polyval(np.polyfit(x, y, deg), input_x)
linear_model = get_model(1)
plt.scatter(x, y)
plt.scatter(x, linear_model(), c='red')
plt.show()
If you want to try another model, you can do this by changing the degree of the model:
plt.scatter(x, y)
plt.scatter(x, get_model(2)(), c='red')
plt.scatter(x, get_model(19)(), c='yellow')
plt.show()
I hope this helps you understand the code a bit better.

Regularised Logistic regression in Python

I am using the below code for logistic regression with regularization in python. Its giving me 80% accuracy on the training set itself.
I am using minimize method 'TNC'. With BFG the results are of 50%.
What is the ideal method(equivalent to fminunc in Octave) to use for gradient descent?
How can I increase or decrease iteration?
What is the default iteration?
Any other suggestion/approach to improve performance?
The same algo in Octave with fminunc gives 83% accuracy on the training set.
import numpy as np
import scipy.optimize as op
from sklearn import preprocessing
import matplotlib.pyplot as plt
from matplotlib import style
from pylab import scatter, show, legend, xlabel, ylabel
from numpy import loadtxt, where
from sklearn.preprocessing import PolynomialFeatures
def sigmoid(z):
return 1/(1 + np.exp(-z));
def Gradient(theta,X,y,l):
m,n = X.shape
#print("theta shape")
#print(theta.shape)
theta = theta.reshape((n,1))
thetaR = theta[1:n,:]
y = y.reshape((m,1))
h = sigmoid(X.dot(theta))
nonRegGrad = ((np.sum(((h-y)*X),axis=0))/m).reshape(n,1)
reg = np.insert((l/m)*thetaR,0,0,axis=0)
grad = nonRegGrad + reg
return grad.flatten();
def CostFunc(theta,X,y,l):
h = sigmoid(X.dot(theta))
m,n=X.shape;
#print("theta shape")
#print(theta.shape)
theta = theta.reshape((n,1))
thetaR = theta[1:n,:]
cost=np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
reg=(l/(2*m))* np.sum(np.square(thetaR))
J=cost+reg
return J;
def predict(theta,X):
m,n=X.shape;
return np.round(sigmoid(X.dot(theta.reshape(n,1))));
data = np.loadtxt(open("ex2data2.txt","rb"),delimiter=",",skiprows=1)
nr,nc = data.shape
X=data[:,0:nc - 1]
#X=preprocessing.scale(X)
#X=np.insert(X,0,1,axis=1)
y= data[:,[nc - 1]]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Microchip Test 1')
ylabel('Microchip Test 2')
legend(['Passed', 'Failed'])
show()
storeX=X
poly = PolynomialFeatures(6)
X=poly.fit_transform(X)
#print(X.shape)
m , n = X.shape;
initial_theta = np.zeros((n,1));
#initial_theta = zeros(shape=(it.shape[1], 1))
l = 1
# Compute and display initial cost and gradient for regularized logistic
# regression
#cost, grad = cost_function_reg(initial_theta, X, y, l)
#def decorated_cost(theta):
# return cost_function_reg(theta, X, y, l)
#print fmin_bfgs(decorated_cost, initial_theta, maxfun=400)
print("Calling optimization")
Result = op.minimize(fun = CostFunc,
x0 = initial_theta,
args = (X, y,l),
method = 'TNC',
jac = Gradient);
optimal_theta = Result.x;
print(Result.x.shape)
print("optimal theta")
print(optimal_theta)
p=predict(optimal_theta,X)
accuracy = np.mean(np.double(p==y))
print("accuracy")
print(accuracy)
enter code here

How to make scipy.interpolate give an extrapolated result beyond the input range?

I'm trying to port a program which uses a hand-rolled interpolator (developed by a mathematician colleage) over to use the interpolators provided by scipy. I'd like to use or wrap the scipy interpolator so that it has as close as possible behavior to the old interpolator.
A key difference between the two functions is that in our original interpolator - if the input value is above or below the input range, our original interpolator will extrapolate the result. If you try this with the scipy interpolator it raises a ValueError. Consider this program as an example:
import numpy as np
from scipy import interpolate
x = np.arange(0,10)
y = np.exp(-x/3.0)
f = interpolate.interp1d(x, y)
print f(9)
print f(11) # Causes ValueError, because it's greater than max(x)
Is there a sensible way to make it so that instead of crashing, the final line will simply do a linear extrapolate, continuing the gradients defined by the first and last two points to infinity.
Note, that in the real software I'm not actually using the exp function - that's here for illustration only!
As of SciPy version 0.17.0, there is a new option for scipy.interpolate.interp1d that allows extrapolation. Simply set fill_value='extrapolate' in the call. Modifying your code in this way gives:
import numpy as np
from scipy import interpolate
x = np.arange(0,10)
y = np.exp(-x/3.0)
f = interpolate.interp1d(x, y, fill_value='extrapolate')
print f(9)
print f(11)
and the output is:
0.0497870683679
0.010394302658
You can take a look at InterpolatedUnivariateSpline
Here an example using it:
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import InterpolatedUnivariateSpline
# given values
xi = np.array([0.2, 0.5, 0.7, 0.9])
yi = np.array([0.3, -0.1, 0.2, 0.1])
# positions to inter/extrapolate
x = np.linspace(0, 1, 50)
# spline order: 1 linear, 2 quadratic, 3 cubic ...
order = 1
# do inter/extrapolation
s = InterpolatedUnivariateSpline(xi, yi, k=order)
y = s(x)
# example showing the interpolation for linear, quadratic and cubic interpolation
plt.figure()
plt.plot(xi, yi)
for order in range(1, 4):
s = InterpolatedUnivariateSpline(xi, yi, k=order)
y = s(x)
plt.plot(x, y)
plt.show()
1. Constant extrapolation
You can use interp function from scipy, it extrapolates left and right values as constant beyond the range:
>>> from scipy import interp, arange, exp
>>> x = arange(0,10)
>>> y = exp(-x/3.0)
>>> interp([9,10], x, y)
array([ 0.04978707, 0.04978707])
2. Linear (or other custom) extrapolation
You can write a wrapper around an interpolation function which takes care of linear extrapolation. For example:
from scipy.interpolate import interp1d
from scipy import arange, array, exp
def extrap1d(interpolator):
xs = interpolator.x
ys = interpolator.y
def pointwise(x):
if x < xs[0]:
return ys[0]+(x-xs[0])*(ys[1]-ys[0])/(xs[1]-xs[0])
elif x > xs[-1]:
return ys[-1]+(x-xs[-1])*(ys[-1]-ys[-2])/(xs[-1]-xs[-2])
else:
return interpolator(x)
def ufunclike(xs):
return array(list(map(pointwise, array(xs))))
return ufunclike
extrap1d takes an interpolation function and returns a function which can also extrapolate. And you can use it like this:
x = arange(0,10)
y = exp(-x/3.0)
f_i = interp1d(x, y)
f_x = extrap1d(f_i)
print f_x([9,10])
Output:
[ 0.04978707 0.03009069]
What about scipy.interpolate.splrep (with degree 1 and no smoothing):
>> tck = scipy.interpolate.splrep([1, 2, 3, 4, 5], [1, 4, 9, 16, 25], k=1, s=0)
>> scipy.interpolate.splev(6, tck)
34.0
It seems to do what you want, since 34 = 25 + (25 - 16).
Here's an alternative method that uses only the numpy package. It takes advantage of numpy's array functions, so may be faster when interpolating/extrapolating large arrays:
import numpy as np
def extrap(x, xp, yp):
"""np.interp function with linear extrapolation"""
y = np.interp(x, xp, yp)
y = np.where(x<xp[0], yp[0]+(x-xp[0])*(yp[0]-yp[1])/(xp[0]-xp[1]), y)
y = np.where(x>xp[-1], yp[-1]+(x-xp[-1])*(yp[-1]-yp[-2])/(xp[-1]-xp[-2]), y)
return y
x = np.arange(0,10)
y = np.exp(-x/3.0)
xtest = np.array((8.5,9.5))
print np.exp(-xtest/3.0)
print np.interp(xtest, x, y)
print extrap(xtest, x, y)
Edit: Mark Mikofski's suggested modification of the "extrap" function:
def extrap(x, xp, yp):
"""np.interp function with linear extrapolation"""
y = np.interp(x, xp, yp)
y[x < xp[0]] = yp[0] + (x[x<xp[0]]-xp[0]) * (yp[0]-yp[1]) / (xp[0]-xp[1])
y[x > xp[-1]]= yp[-1] + (x[x>xp[-1]]-xp[-1])*(yp[-1]-yp[-2])/(xp[-1]-xp[-2])
return y
It may be faster to use boolean indexing with large datasets, since the algorithm checks if every point is in outside the interval, whereas boolean indexing allows an easier and faster comparison.
For example:
# Necessary modules
import numpy as np
from scipy.interpolate import interp1d
# Original data
x = np.arange(0,10)
y = np.exp(-x/3.0)
# Interpolator class
f = interp1d(x, y)
# Output range (quite large)
xo = np.arange(0, 10, 0.001)
# Boolean indexing approach
# Generate an empty output array for "y" values
yo = np.empty_like(xo)
# Values lower than the minimum "x" are extrapolated at the same time
low = xo < f.x[0]
yo[low] = f.y[0] + (xo[low]-f.x[0])*(f.y[1]-f.y[0])/(f.x[1]-f.x[0])
# Values higher than the maximum "x" are extrapolated at same time
high = xo > f.x[-1]
yo[high] = f.y[-1] + (xo[high]-f.x[-1])*(f.y[-1]-f.y[-2])/(f.x[-1]-f.x[-2])
# Values inside the interpolation range are interpolated directly
inside = np.logical_and(xo >= f.x[0], xo <= f.x[-1])
yo[inside] = f(xo[inside])
In my case, with a data set of 300000 points, this means an speed up from 25.8 to 0.094 seconds, this is more than 250 times faster.
I did it by adding a point to my initial arrays. In this way I avoid defining self-made functions, and the linear extrapolation (in the example below: right extrapolation) looks ok.
import numpy as np
from scipy import interp as itp
xnew = np.linspace(0,1,51)
x1=xold[-2]
x2=xold[-1]
y1=yold[-2]
y2=yold[-1]
right_val=y1+(xnew[-1]-x1)*(y2-y1)/(x2-x1)
x=np.append(xold,xnew[-1])
y=np.append(yold,right_val)
f = itp(xnew,x,y)
I don't have enough reputation to comment, but in case somebody is looking for an extrapolation wrapper for a linear 2d-interpolation with scipy, I have adapted the answer that was given here for the 1d interpolation.
def extrap2d(interpolator):
xs = interpolator.x
ys = interpolator.y
zs = interpolator.z
zs = np.reshape(zs, (-1, len(xs)))
def pointwise(x, y):
if x < xs[0] or y < ys[0]:
x1_index = np.argmin(np.abs(xs - x))
x2_index = x1_index + 1
y1_index = np.argmin(np.abs(ys - y))
y2_index = y1_index + 1
x1 = xs[x1_index]
x2 = xs[x2_index]
y1 = ys[y1_index]
y2 = ys[y2_index]
z11 = zs[x1_index, y1_index]
z12 = zs[x1_index, y2_index]
z21 = zs[x2_index, y1_index]
z22 = zs[x2_index, y2_index]
return (z11 * (x2 - x) * (y2 - y) +
z21 * (x - x1) * (y2 - y) +
z12 * (x2 - x) * (y - y1) +
z22 * (x - x1) * (y - y1)
) / ((x2 - x1) * (y2 - y1) + 0.0)
elif x > xs[-1] or y > ys[-1]:
x1_index = np.argmin(np.abs(xs - x))
x2_index = x1_index - 1
y1_index = np.argmin(np.abs(ys - y))
y2_index = y1_index - 1
x1 = xs[x1_index]
x2 = xs[x2_index]
y1 = ys[y1_index]
y2 = ys[y2_index]
z11 = zs[x1_index, y1_index]
z12 = zs[x1_index, y2_index]
z21 = zs[x2_index, y1_index]
z22 = zs[x2_index, y2_index]#
return (z11 * (x2 - x) * (y2 - y) +
z21 * (x - x1) * (y2 - y) +
z12 * (x2 - x) * (y - y1) +
z22 * (x - x1) * (y - y1)
) / ((x2 - x1) * (y2 - y1) + 0.0)
else:
return interpolator(x, y)
def ufunclike(xs, ys):
if isinstance(xs, int) or isinstance(ys, int) or isinstance(xs, np.int32) or isinstance(ys, np.int32):
res_array = pointwise(xs, ys)
else:
res_array = np.zeros((len(xs), len(ys)))
for x_c in range(len(xs)):
res_array[x_c, :] = np.array([pointwise(xs[x_c], ys[y_c]) for y_c in range(len(ys))]).T
return res_array
return ufunclike
I haven't commented a lot and I am aware, that the code isn't super clean. If anybody sees any errors, please let me know. In my current use-case it is working without a problem :)
I'm afraid that there is no easy to do this in Scipy to my knowledge. You can, as I'm fairly sure that you are aware, turn off the bounds errors and fill all function values beyond the range with a constant, but that doesn't really help. See this question on the mailing list for some more ideas. Maybe you could use some kind of piecewise function, but that seems like a major pain.
The below code gives you the simple extrapolation module. k is the value to which the data set y has to be extrapolated based on the data set x. The numpy module is required.
def extrapol(k,x,y):
xm=np.mean(x);
ym=np.mean(y);
sumnr=0;
sumdr=0;
length=len(x);
for i in range(0,length):
sumnr=sumnr+((x[i]-xm)*(y[i]-ym));
sumdr=sumdr+((x[i]-xm)*(x[i]-xm));
m=sumnr/sumdr;
c=ym-(m*xm);
return((m*k)+c)
Standard interpolate + linear extrapolate:
def interpola(v, x, y):
if v <= x[0]:
return y[0]+(y[1]-y[0])/(x[1]-x[0])*(v-x[0])
elif v >= x[-1]:
return y[-2]+(y[-1]-y[-2])/(x[-1]-x[-2])*(v-x[-2])
else:
f = interp1d(x, y, kind='cubic')
return f(v)

Categories

Resources