I try to implement Polynomial Regression with Gradient Descent. I want to fit the following function:
The code I use is:
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(seed=42)
def create_data():
x = PolynomialFeatures(degree=5).fit_transform(np.linspace(-10,10,100).reshape(100,-1))
l = lambda x_i: (1/3)*x_i**3-2*x_i**2+2*x_i+2
data = l(x[:,1])
noise = np.random.normal(0,0.1,size=np.shape(data))
y = data+noise
y= y.reshape(100,1)
return {'x':x,'y':y}
def plot_function(x,y):
fig = plt.figure(figsize=(10,10))
plt.plot(x[:,1],[(1/3)*x_i**3-2*x_i**2+2*x_i+2 for x_i in x[:,1]],c='lightgreen',linewidth=3,zorder=0)
plt.scatter(x[:,1],y)
plt.show()
def w_update(y,x,batch,w_old,eta):
derivative = np.sum([(y[i]-np.dot(w_old.T,x[i,:]))*x[i,:] for i in range(np.shape(x)[0])])
print(derivative)
return w_old+eta*(1/batch)*derivative
# initialize variables
w = np.random.normal(size=(6,1))
data = create_data()
x = data['x']
y = data['y']
plot_function(x,y)
# Update w
w_s = []
Error = []
for i in range(500):
error = (1/2)*np.sum([(y[i]-np.dot(w.T,x[i,:]))**2 for i in range(len(x))])
Error.append(error)
w_prime = w_update(y,x,np.shape(x)[0],w,0.001)
w = w_prime
w_s.append(w)
# Plot the predicted function
plt.plot(x[:,1],np.dot(x,w))
plt.show()
# Plot the error
fig3 = plt.figure()
plt.scatter(range(len(Error[10:])),Error[10:])
plt.show()
But as result I receive smth. strange which is completely out of bounds...I have also tried to alter the number of iterations as well as the parameter theta but it did not help. I assume I have made an mistake in the update of w.
I have found the solution. The Problem is indeed in the part where I calculate the weights. Specifically in:
np.sum([(y[d]-np.dot(w_old.T,x[d,:]))*x[d,:] for d in range(np.shape(x)[0])])
which should be like:
np.sum([-(y[d]-np.dot(w.T.copy(),x[d,:]))*x[d,:].reshape(np.shape(w)) for d in range(len(x))],axis=0)
We have to add np.sum(axis=0) to get the dimensionality we want --> Dimensionality must be equal to w. The numpy sum documentation sais
The default, axis=None, will sum all of the elements of the input
array.
This is not what we want to achieve. Adding axis = 0 sums over the first axis of our array which is of dimensionality (100,7,1) hence the 100 elements of dimensionality (7,1) are summed up and the resulting array is of dimensionality (7,1) which is exactly what we want. Implementing this and cleaning up the code yields:
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler
np.random.seed(seed=42)
def create_data():
x = PolynomialFeatures(degree=6).fit_transform(np.linspace(-2,2,100).reshape(100,-1))
x[:,1:] = MinMaxScaler(feature_range=(-2,2),copy=False).fit_transform(x[:,1:])
l = lambda x_i: np.cos(0.8*np.pi*x_i)
data = l(x[:,1])
noise = np.random.normal(0,0.1,size=np.shape(data))
y = data+noise
y= y.reshape(100,1)
# Normalize Data
return {'x':x,'y':y}
def plot_function(x,y,w,Error,w_s):
fig,ax = plt.subplots(nrows=1,ncols=2,figsize=(40,10))
ax[0].plot(x[:,1],[np.cos(0.8*np.pi*x_i) for x_i in x[:,1]],c='lightgreen',linewidth=3,zorder=0)
ax[0].scatter(x[:,1],y)
ax[0].plot(x[:,1],np.dot(x,w))
ax[0].set_title('Function')
ax[1].scatter(range(iterations),Error)
ax[1].set_title('Error')
plt.show()
# initialize variables
data = create_data()
x = data['x']
y = data['y']
w = np.random.normal(size=(np.shape(x)[1],1))
eta = 0.1
iterations = 10000
batch = 10
def stochastic_gradient_descent(x,y,w,eta):
derivative = -(y-np.dot(w.T,x))*x.reshape(np.shape(w))
return eta*derivative
def batch_gradient_descent(x,y,w,eta):
derivative = np.sum([-(y[d]-np.dot(w.T.copy(),x[d,:]))*x[d,:].reshape(np.shape(w)) for d in range(len(x))],axis=0)
return eta*(1/len(x))*derivative
def mini_batch_gradient_descent(x,y,w,eta,batch):
gradient_sum = np.zeros(shape=np.shape(w))
for b in range(batch):
choice = np.random.choice(list(range(len(x))))
gradient_sum += -(y[choice]-np.dot(w.T,x[choice,:]))*x[choice,:].reshape(np.shape(w))
return eta*(1/batch)*gradient_sum
# Update w
w_s = []
Error = []
for i in range(iterations):
# Calculate error
error = (1/2)*np.sum([(y[i]-np.dot(w.T,x[i,:]))**2 for i in range(len(x))])
Error.append(error)
# Stochastic Gradient Descent
"""
for d in range(len(x)):
w-= stochastic_gradient_descent(x[d,:],y[d],w,eta)
w_s.append(w.copy())
"""
# Minibatch Gradient Descent
"""
w-= mini_batch_gradient_descent(x,y,w,eta,batch)
"""
# Batch Gradient Descent
w -= batch_gradient_descent(x,y,w,eta)
# Show predicted weights
print(w_s)
# Plot the predicted function and the Error
plot_function(x,y,w,Error,w_s)
As result we receive:
Which surely can be improved by altering eta and the number of iterations as well as switching to Stochastic or Mini Batch Gradient Descent or more sophisticated optimization algorithms.
Related
I have to write a program in python for curve fitting for at least 20 different parameters of occupation probability as explained below.
I have added a model for fitting as well.
Later when from the fit we have the values of fitted occupation probability, we have to determine the mean phonon or vibrational quantum number by thermal population distribution for Pn.
I am attaching a code below just for one parameter P0.
import numpy as np
import pandas as pd
from lmfit import Minimizer, Parameters, report_fit
df = pd.read_csv('Fock0_1st BSB.csv')
x = pd.DataFrame(df["Untitled"]).to_numpy()
data = pd.DataFrame(df["Untitled 1"]).to_numpy()
x = [i[0] for i in x]data = [i[0] for i in data]
x = np.asarray (x)
data = np.asarray(data)
x = x/1000
data = abs(data-100)/100
n=0 #Ground State Measurements for n=0
def function(params,x,data):
v=params.valuesdict()
model = 0.5*(1+(v['P0'])*np.cos(np.sqrt(n+1)*v['omega0']*v['eta']*x + v['phase'])*np.exp(-(v['gamma']((n+1)**0.7))*x)) - v['decay']*x
return model - data
params=Parameters()
params.add('P0',value=0.97,min=0.01,max=0.999)
params.add('omega0',value=0.1967,min=0.156,max=0.23,vary=True)
params.add('eta',value=0.0629,min=0.01,max=0.11,vary=True)
params.add('gamma',value=5.6E-4)
params.add('phase',value=0.143)
params.add('decay',value=0.1E-6)
minner = Minimizer(function, params, fcn_args=(x, data))
result = minner.minimize()
final = data + result.residual
report_fit(result)
try:
import matplotlib.pyplot as plt
plt.plot(x, data, '+')#
plt.plot(x, final)
plt.show()
except ImportError:
pass
I need to develop a neural network able to produce as output values of a 2D map (for example of a gaussian distribution) starting from fewparameter in input (offset, limit, sigma). In the code below I tried to start, probably in the wrong way, with a simpler case study with the 1D map of a gaussian distribution.
Output are not as expected, I don't know if I miss the data formatting or the instance of the neural network. Any sugestion?
from sklearn.neural_network import MLPRegressor
import numpy as np
import matplotlib.pyplot as plt
import math
def gaussian(x, alpha, r):
return 1./(math.sqrt(alpha**math.pi))*np.exp(-alpha*np.power((x - r), 2.))
features = 20000
output = 1000
w = []
j = []
for iii in range(0,features):
mu,sigma = 0.,(iii+1)
x = np.linspace(-(iii+1), (iii+1), output)
t = gaussian(x, sigma, iii)
t = t.tolist()
dummy = np.zeros(3)
dummy[0] = sigma
dummy[1] = (iii+1)
dummy[2] = (iii)
dummy = dummy.tolist()
w.append(t)
j.append(dummy)
nn = MLPRegressor(hidden_layer_sizes=(5000,10), activation='tanh', solver='lbfgs')
model = nn.fit(j,w)
test_i = [[1.0,1.0,0.0]]
test_o = nn.predict(test_i)
I am trying to fit a quadratic function to some data, and I'm trying to do this without using numpy's polyfit function.
Mathematically I tried to follow this website https://neutrium.net/mathematics/least-squares-fitting-of-a-polynomial/ but somehow I don't think that I'm doing it right. If anyone could assist me that would be great, or If you could suggest another way to do it that would also be awesome.
What I've tried so far:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'b--')
plt.show()
As you can see my curve doesn't compare well to nnumpy's polyfit curve.
Update:
I went through my code and removed all the stupid mistakes and now it works, when I try to fit it over 3 points, but I have no idea how to fit over more than three points.
This is the new code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'r--')
plt.show()
Instead using Cramer's Rule, actually solve the system using least squares. Remember that Cramer's Rule will only work if the total number of points you have equals the desired order of polynomial plus 1.
If you don't have this, then Cramer's Rule will not work as you're trying to find an exact solution to the problem. If you have more points, the method is unsuitable as we will create an overdetermined system of equations.
To adapt this to more points, numpy.linalg.lstsq would be a better fit as it solves the solution to the Ax = b by computing the vector x that minimizes the Euclidean norm using the matrix A. Therefore, remove the y values from the last column of the features matrix and solve for the coefficients and use numpy.linalg.lstsq to solve for the coefficients:
import numpy as np
import matplotlib.pyplot as plt
ones = np.ones(4)
xfeature = np.asarray([0,1,2,3])
squaredfeature = xfeature ** 2
b = np.asarray([1,2,0,3])
features = np.concatenate((np.vstack(ones),np.vstack(xfeature),np.vstack(squaredfeature)), axis = 1) # Change - remove the y values
determinants = np.linalg.lstsq(features, b)[0] # Change - use least squares
plt.scatter(xfeature,b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
plt.show()
I get this plot now, which matches what the dashed curve is in your graph, also matching what numpy.polyfit gives you:
data can be found here: ex2data2.txt
I'm not sure what call to plt.contour() I should be using to reproduce this.
the related Matlab function call would be:
contour(u, v, z, [0, 0], 'LineWidth', 2)
I'm trying to plot the decision boundary for a non-linear logistic regression like the following image
import scikitplot.plotters as skplt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn import metrics
from ggplot import *
import time
def mapFeature(X1, X2, df=True):
"""
X1, X2: dtype = pd.DataFrame, float, int
either a single value or a vector of values
df : dtype = boolean
whether it's a single scalar value or a vector of values
----------
Return: dtype = m row vector or m x n vector of feature values
Calculates each feature and returns its value
"""
# add a column of ones for intercept parameter
out = pd.DataFrame({'1':np.ones(X1.size)})
# max 6th degree polynomial
for i in range(1,7):
for j in range(i+1):
# all the combinations of polynomials up to 7th degree
value = (X1**(i-j))*(X2**j)
col_name = 'X1**{} * X2**{}'.format(i-j, j)
# When we give a vector with only one dimension, we need to specify
# whether to add it as a column or a row. 0 denotes adding a row,
# and 1 would be a column.
if df:
out = out.join(pd.DataFrame({col_name: value}))
else:
out = out.join(pd.DataFrame({col_name: value}, index=[0]))
return out
if __name__ == '__main__':
data = pd.read_csv('ex2data2.txt', header=None,
names=['Test1', 'Test2', 'Pass'])
X = data.iloc[:, :2]
y = data.iloc[:,2]
X = mapFeature(X.iloc[:,0], X.iloc[:,1])
clf = LogisticRegression().fit(X, y)
theta = clf.coef_
u = np.linspace(start, end, 30)
v = np.linspace(start, end, 30)
uu, vv = np.meshgrid(u, v)
z = np.zeros((30, 30))
for i in range(30):
for j in range(30):
z[i,j] = mapFeature(u[i], v[i], df=False).values.dot(theta.T)
plt.contour(uu, vv, z, [0])
plt.show()
I found this very helpful blog for the implementation of self organizing maps using tensorflow. I tried running the scikit learn iris data set on it and I get the result see image below. To see how the SOM evolves I would like to animate my graph and here is where I got stuck. I found some basic example for animation:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
fig2 = plt.figure()
x = np.arange(-9, 10)
y = np.arange(-9, 10).reshape(-1, 1)
base = np.hypot(x, y)
ims = []
for add in np.arange(15):
ims.append((plt.pcolor(x, y, base + add, norm=plt.Normalize(0, 30)),))
im_ani = animation.ArtistAnimation(fig2, ims, interval=50, repeat_delay=3000, blit=True)
plt.show()
To animate I must edit the train function of som.py because the training for loop is encapsulated there. It looks like this:
def train(self, input_vects):
"""
Trains the SOM.
'input_vects' should be an iterable of 1-D NumPy arrays with
dimensionality as provided during initialization of this SOM.
Current weightage vectors for all neurons(initially random) are
taken as starting conditions for training.
"""
#fig2 = plt.figure()
#Training iterations
for iter_no in tqdm(range(self._n_iterations)):
#Train with each vector one by one
for input_vect in input_vects:
self._sess.run(self._training_op,
feed_dict={self._vect_input: input_vect,
self._iter_input: iter_no})
#Store a centroid grid for easy retrieval later on
centroid_grid = [[] for i in range(self._m)]
self._weightages = list(self._sess.run(self._weightage_vects))
self._locations = list(self._sess.run(self._location_vects))
for i, loc in enumerate(self._locations):
centroid_grid[loc[0]].append(self._weightages[i])
#im_ani = animation.ArtistAnimation(fig2, centroid_grid, interval=50, repeat_delay=3000, blit=True)
self._centroid_grid = centroid_grid
self._trained = True
#plt.show()
The comments are my try to implement the animation but it doesn't work because in the basic example the ims list is a matplotlib object and in the training function the list is a 4d numpy array.
To sum it up how can I animate my plot? Thanks for your help in advance.
Here is my full code:
som.py
import tensorflow as tf
import numpy as np
from tqdm import tqdm
import matplotlib.animation as animation
from matplotlib import pyplot as plt
import time
class SOM(object):
"""
2-D Self-Organizing Map with Gaussian Neighbourhood function
and linearly decreasing learning rate.
"""
#To check if the SOM has been trained
_trained = False
def __init__(self, m, n, dim, n_iterations=100, alpha=None, sigma=None):
"""
Initializes all necessary components of the TensorFlow
Graph.
m X n are the dimensions of the SOM. 'n_iterations' should
should be an integer denoting the number of iterations undergone
while training.
'dim' is the dimensionality of the training inputs.
'alpha' is a number denoting the initial time(iteration no)-based
learning rate. Default value is 0.3
'sigma' is the the initial neighbourhood value, denoting
the radius of influence of the BMU while training. By default, its
taken to be half of max(m, n).
"""
#Assign required variables first
self._m = m
self._n = n
if alpha is None:
alpha = 0.3
else:
alpha = float(alpha)
if sigma is None:
sigma = max(m, n) / 2.0
else:
sigma = float(sigma)
self._n_iterations = abs(int(n_iterations))
##INITIALIZE GRAPH
self._graph = tf.Graph()
##POPULATE GRAPH WITH NECESSARY COMPONENTS
with self._graph.as_default():
##VARIABLES AND CONSTANT OPS FOR DATA STORAGE
#Randomly initialized weightage vectors for all neurons,
#stored together as a matrix Variable of size [m*n, dim]
self._weightage_vects = tf.Variable(tf.random_normal(
[m*n, dim]))
#Matrix of size [m*n, 2] for SOM grid locations
#of neurons
self._location_vects = tf.constant(np.array(
list(self._neuron_locations(m, n))))
##PLACEHOLDERS FOR TRAINING INPUTS
#We need to assign them as attributes to self, since they
#will be fed in during training
#The training vector
self._vect_input = tf.placeholder("float", [dim])
#Iteration number
self._iter_input = tf.placeholder("float")
##CONSTRUCT TRAINING OP PIECE BY PIECE
#Only the final, 'root' training op needs to be assigned as
#an attribute to self, since all the rest will be executed
#automatically during training
#To compute the Best Matching Unit given a vector
#Basically calculates the Euclidean distance between every
#neuron's weightage vector and the input, and returns the
#index of the neuron which gives the least value
bmu_index = tf.argmin(tf.sqrt(tf.reduce_sum(
tf.pow(tf.subtract(self._weightage_vects, tf.stack([self._vect_input for i in range(m*n)])), 2), 1)), 0)
#This will extract the location of the BMU based on the BMU's
#index
slice_input = tf.pad(tf.reshape(bmu_index, [1]),
np.array([[0, 1]]))
bmu_loc = tf.reshape(tf.slice(self._location_vects, slice_input,
tf.constant(np.array([1, 2]))),
[2])
#To compute the alpha and sigma values based on iteration
#number
learning_rate_op = tf.subtract(1.0, tf.div(self._iter_input,
self._n_iterations))
_alpha_op = tf.multiply(alpha, learning_rate_op)
_sigma_op = tf.multiply(sigma, learning_rate_op)
#Construct the op that will generate a vector with learning
#rates for all neurons, based on iteration number and location
#wrt BMU.
bmu_distance_squares = tf.reduce_sum(tf.pow(tf.subtract(
self._location_vects, tf.stack(
[bmu_loc for i in range(m*n)])), 2), 1)
neighbourhood_func = tf.exp(tf.negative(tf.div(tf.cast(
bmu_distance_squares, "float32"), tf.pow(_sigma_op, 2))))
learning_rate_op = tf.multiply(_alpha_op, neighbourhood_func)
#Finally, the op that will use learning_rate_op to update
#the weightage vectors of all neurons based on a particular
#input
learning_rate_multiplier = tf.stack([tf.tile(tf.slice(
learning_rate_op, np.array([i]), np.array([1])), [dim])
for i in range(m*n)])
weightage_delta = tf.multiply(
learning_rate_multiplier,
tf.subtract(tf.stack([self._vect_input for i in range(m*n)]),
self._weightage_vects))
new_weightages_op = tf.add(self._weightage_vects,
weightage_delta)
self._training_op = tf.assign(self._weightage_vects,
new_weightages_op)
##INITIALIZE SESSION
self._sess = tf.Session()
##INITIALIZE VARIABLES
init_op = tf.global_variables_initializer()
self._sess.run(init_op)
def _neuron_locations(self, m, n):
"""
Yields one by one the 2-D locations of the individual neurons
in the SOM.
"""
#Nested iterations over both dimensions
#to generate all 2-D locations in the map
for i in range(m):
for j in range(n):
yield np.array([i, j])
def train(self, input_vects):
"""
Trains the SOM.
'input_vects' should be an iterable of 1-D NumPy arrays with
dimensionality as provided during initialization of this SOM.
Current weightage vectors for all neurons(initially random) are
taken as starting conditions for training.
"""
#fig2 = plt.figure()
#Training iterations
for iter_no in tqdm(range(self._n_iterations)):
#Train with each vector one by one
for input_vect in input_vects:
self._sess.run(self._training_op,
feed_dict={self._vect_input: input_vect,
self._iter_input: iter_no})
#Store a centroid grid for easy retrieval later on
centroid_grid = [[] for i in range(self._m)]
self._weightages = list(self._sess.run(self._weightage_vects))
self._locations = list(self._sess.run(self._location_vects))
for i, loc in enumerate(self._locations):
centroid_grid[loc[0]].append(self._weightages[i])
#im_ani = animation.ArtistAnimation(fig2, centroid_grid, interval=50, repeat_delay=3000, blit=True)
self._centroid_grid = centroid_grid
#print(centroid_grid)
self._trained = True
#plt.show()
def get_centroids(self):
"""
Returns a list of 'm' lists, with each inner list containing
the 'n' corresponding centroid locations as 1-D NumPy arrays.
"""
if not self._trained:
raise ValueError("SOM not trained yet")
return self._centroid_grid
def map_vects(self, input_vects):
"""
Maps each input vector to the relevant neuron in the SOM
grid.
'input_vects' should be an iterable of 1-D NumPy arrays with
dimensionality as provided during initialization of this SOM.
Returns a list of 1-D NumPy arrays containing (row, column)
info for each input vector(in the same order), corresponding
to mapped neuron.
"""
if not self._trained:
raise ValueError("SOM not trained yet")
to_return = [self._locations[min([i for i in range(len(self._weightages))],
key=lambda x: np.linalg.norm(vect-self._weightages[x]))] for vect in input_vects]
return to_return
usage.py
from matplotlib import pyplot as plt
import matplotlib.animation as animation
import numpy as np
from som import SOM
from sklearn.datasets import load_iris
data = load_iris()
flower_data = data['data']
normed_flower_data = flower_data / flower_data.max(axis=0)
target_int = data['target']
target_names = data['target_names']
targets = [target_names[i] for i in target_int]
#Train a 20x30 SOM with 400 iterations
som = SOM(25, 25, 4, 100) # My parameters
som.train(normed_flower_data)
#Get output grid
image_grid = som.get_centroids()
#Map colours to their closest neurons
mapped = som.map_vects(normed_flower_data)
#Plot
plt.imshow(image_grid)
plt.title('SOM')
for i, m in enumerate(mapped):
plt.text(m[1], m[0], targets[i], ha='center', va='center',
bbox=dict(facecolor='white', alpha=0.5, lw=0))
plt.show()