Example for NARX in python - both training and prediction - python

Is there any end-to-end example of how to train and predict/inference data using a NARX model in python?
There is the library PyNeurgen NARX PyNeurgen library
but the documentation for PyNeurgen isn't very complete.
This OP seems to have written a Keras implementation but, but the code is lacking an implementation for inferencing/prediction. NARX implementation using keras

I think you are looking for the equivalents of model.fit() & model.predict(), which are net.learn() and net.test() in pyneurgen. Based on this tutorial and this example, I formulated this demo (in python 2.7) to give you an idea on how it is done. I hope it helps.
import math
import numpy as np
import matplotlib.pylab as plt
from pyneurgen.neuralnet import NeuralNet
from pyneurgen.recurrent import NARXRecurrent
def plot_results(x, y, y_true):
plt.subplot(3, 1, 1)
plt.plot([i[1] for i in population])
plt.title("Population")
plt.grid(True)
plt.subplot(3, 1, 2)
plt.plot(x, y, 'bo', label='targets')
plt.plot(x, y_true, 'ro', label='actuals')
plt.grid(True)
plt.legend(loc='lower left', numpoints=1)
plt.title("Test Target Points vs Actual Points")
plt.subplot(3, 1, 3)
plt.plot(range(1, len(net.accum_mse) + 1, 1), net.accum_mse)
plt.xlabel('epochs')
plt.ylabel('mean squared error')
plt.grid(True)
plt.title("Mean Squared Error by Epoch")
plt.show()
def generate_data():
# all samples are drawn from this population
pop_len = 200
factor = 1.0 / float(pop_len)
population = [[i, math.sin(float(i) * factor * 10.0)] for i in range(pop_len)]
population_shuffle = population[:]
all_inputs = []
all_targets = []
np.random.shuffle(population_shuffle)
for position, target in population_shuffle:
all_inputs.append([position * factor])
all_targets.append([target])
# print(all_inputs[-1], all_targets[-1])
return population, all_inputs, all_targets
# generate data
population, all_inputs, all_targets = generate_data()
# NARXRecurrent
input_nodes, hidden_nodes, output_nodes = 1, 10, 1
output_order, incoming_weight_from_output = 3, .6
input_order, incoming_weight_from_input = 2, .4
# init neural network
net = NeuralNet()
net.init_layers(input_nodes, [hidden_nodes], output_nodes,
NARXRecurrent(output_order, incoming_weight_from_output,
input_order, incoming_weight_from_input))
net.randomize_network()
net.set_halt_on_extremes(True)
# set constrains and rates
net.set_random_constraint(.5)
net.set_learnrate(.1)
# set inputs and outputs
net.set_all_inputs(all_inputs)
net.set_all_targets(all_targets)
# set lengths
length = len(all_inputs)
learn_end_point = int(length * .8)
# set ranges
net.set_learn_range(0, learn_end_point)
net.set_test_range(learn_end_point + 1, length - 1)
# add activation to layer 1
net.layers[1].set_activation_type('tanh')
# fit data to model
net.learn(epochs=150, show_epoch_results=True, random_testing=False)
# define mean squared error
mse = net.test()
print "Testing mse = ", mse
# define data
x = [item[0][0] * 200.0 for item in net.get_test_data()]
y = [item[0][0] for item in net.test_targets_activations]
y_true = [item[1][0] for item in net.test_targets_activations]
# plot results
plot_results(x, y, y_true)
This results in the following plot:
For more examples, take a look at the following links:
Air_Quality_ppm_prediction/NARXImproved.ipynb
NARX-Prediction-Model/NARX_Neural_Network.ipynb
CS230project/NARX_PyNeurGen.ipynb
TimeSeriesNotes/narx.py

Related

The result of least_squares is different depending on the environment

The acquisition channel of scipy and the same version are used.
The result of least_squares is different depending on the environment.
Differences in the environment, the PC is different.
version:1.9.1 py39h316f440_0
channel:conda-forge
environment:windows
I've attached the source code I ran.
If the conditions are the same except for the environment, I would like to get the same results.
Why different causes? How can I do that?
thank you.
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from scipy.optimize import least_squares
import random
random.seed(134)
import numpy as np
np.random.seed(134)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from scipy.optimize import least_squares
def report_params(fit_params_values, fit_param_names):
for each in range(len(fit_param_names)):
print(fit_param_names[each], 'is', fit_params_values[each])
# define your modules
def pCon1():
# This is the module for a specific insubstatiation of a constituitive promoter
# the input is nothing
# the output is a protein production amount per time unit
pCon1_production_rate = 100
return pCon1_production_rate
def pLux1(LuxR, AHL):
# This is the module for a specific insubstatiation of a lux promoter
# the input is a LuxR amount and an AHL amount
# the output is a protein production amount per time unit
# For every promoter there is some function that determines what the promoter's
# maximal and basal expression are based on the amount of transcriptional factor
# is floating around in the cell. These numbers are empircally determined, and
# for demonstration purposes are fictionally and arbitrarily filled in here.
# These functions take the form of hill functions.
basal_n = 2
basal_basal = 2
basal_max = 2
basal_kd = 2
basal_expression_rate = basal_basal + (basal_max * (LuxR**basal_n / (LuxR**basal_n + basal_kd)))
max_n = 2
max_max = 2
max_kd = 2
maximal_expression_rate = (LuxR**max_n / (LuxR**max_n + max_kd))
pLux1_n = 2
pLux1_kd = 10
pLux1_production_rate = basal_expression_rate + maximal_expression_rate*(AHL**pLux1_n / (pLux1_kd + AHL**pLux1_n))
return pLux1_production_rate
def simulation_set_of_equations(y, t, *args):
# Args are strictly for parameters we want to eventually estimate.
# Everything else must be hardcoded below. Sorry for the convience.
# Unpack your parameters
k_pCon_express = args[0] # A summation of transcription and translation from a pCon promoter
k_pLux_express = args[1] # A summation of transcription and translation from a pLux promoter
k_loss = args[2] # A summation of dilution and degredation
# Unpack your current amount of each species
LuxR, GFP, AHL = y
# Determine the change in each species
dLuxR = pCon1() - k_loss*LuxR
dGFP = pLux1(LuxR, AHL)*k_pLux_express - k_loss*GFP
dAHL = 0 # for now we're assuming AHL was added exogenously and never degrades
# Return the change in each species; make sure same order as your init values
# scipy.odeint will take these values and apply them to the current value of each species in the next time step for you
return [dLuxR, dGFP, dAHL]
# Parameters
k_pCon_express = 101
k_pLux_express = 50
k_loss = 0.1
params = (k_pCon_express, k_pLux_express, k_loss)
param_names = ['k_pCon_express', 'k_pLux_express', 'k_loss'] # somehow this is honestly necessary in Python?!
# Initial Conditions
# LuxR, GFP, AHL
init_P = [1000, 0, 11]
# Timesteps
n_steps = 500
t = np.linspace(0, 30, n_steps)
num_P = odeint(simulation_set_of_equations, init_P, t, args = (params))
plt.plot(t, num_P[:,0], c='b', label = 'LuxR')
plt.plot(t, num_P[:,1], c='g', label = 'GFP')
plt.plot(t, num_P[:,2], c='r', label = 'AHL')
plt.xlabel('Time')
plt.ylabel('Concentration')
plt.legend(loc = 'best')
plt.grid()
plt.yscale('log')
plt.show()
noise = np.random.normal(0, 10, num_P.shape)
exp_P = num_P + noise
exp_t = t[::10]
exp_P = exp_P[::10]
# Create experimental data. Just take the regular simulation data and add some gaussian noise to it.
def residuals(params):
params = tuple(params)
sim_P = odeint(simulation_set_of_equations, init_P, exp_t, args = params)
res = sim_P - exp_P
return res.flatten()
initial_guess = (100, 100, 100)
low_bounds = [0, 0, 0]
up_bounds = [1000, 1000, 1000]
fitted_params = least_squares(residuals, initial_guess, bounds=(low_bounds, up_bounds)).x
# small reminder: .x is the fitted parameters attribute of the least_squares output
# With least_squares function, unlike, say, curve_fit, it does not compute the covariance matrix for you
# TODO calculate standard deviation of parameter estimation
# (will this ever be used other than sanity checking?)
print(params)
report_params(fitted_params, param_names)
(101, 50, 0.1)
k_pCon_express is 100.0
k_pLux_express is 49.9942246627
k_loss is 0.100037839987
plt.plot(t, odeint(simulation_set_of_equations, init_P, t, args = tuple(params))[:,1], c='r', label='GFP - Given Param Simulation')
plt.scatter(exp_t, exp_P[:,1], c='b', label='GFP - Fake Experimental Data')
plt.plot(t, odeint(simulation_set_of_equations, init_P, t, args = tuple(fitted_params))[:,1], c='g', label='GFP - Fitted Param Simlulation')
plt.legend(loc = 'best')
plt.xlabel('Time')
plt.ylabel('Concentration')
plt.grid()
plt.yscale('log')
plt.show()

Python optimization of prediction of random forest regressor

I have built a random forest regressor to predict the elasticity of a certain object based on color, material, size and other features.
The model works fine and I can predict the expected elasticity given certain inputs.
Eventually, I want to be able to find the lowest elasticity with certain constraints. The inputs have limited possibilities, i.e., material can only be plastic or textile.
I would like to have a smart solution in which I don't have to brute force and try all the possible combinations and find the one with lowest elasticity. I have found that surrogate models can be used for this but I don't understand how to apply this concept to my problem. For example, what is the objective function I should optimize in my case? I thought of passing the .predict() of the random forest but I'm not sure this is the correct way.
To summarize, I'd like to have a solution that given certain conditions, tells me what should be the best set of features to have lowest elasticity. Example, I'm looking for the lowest elasticity when the object is made of plastic --> I'd like to receive the set of other features that tells me how to get lowest elasticity in that case. Or simply, what feature I should tune to improve the performance
import numpy
from scipy.optimize import minimize
import random
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=10, random_state=0)
model.fit(X_train,y_train)
material = [0,1]
size= list(range(1, 45))
color= list(range(1, 500))
def objective(x):
material= x[0]
size = x[1]
color = x[2]
return model.predict([[material,size,color]])
# initial guesses
n = 3
x0 = np.zeros(n)
x0[0] = random.choice(material)
x0[1] = random.choice(size)
x0[2] = random.choice(color)
# optimize
b = (None,None)
bnds = (b, b, b, b, b)
solution = minimize(objective, x0, method='nelder-mead',
options={'xtol': 1e-8, 'disp': True})
x = solution.x
print('Final Objective: ' + str(objective(x)))
This is one solution if I understood you correctly,
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from scipy.optimize import differential_evolution
model = None
def objective(x):
material= x[0]
size = x[1]
color = x[2]
return model.predict([[material,size,color]])
# define input data
material = np.random.choice([0,1], 10); material = np.expand_dims(material, 1)
size = np.arange(10); size = np.expand_dims(size, 1)
color = np.arange(20, 30); color = np.expand_dims(color, 1)
input = np.concatenate((material, size, color), 1) # shape = (10, 3)
# define output = elasticity between [0, 1] i.e. 0-100%
elasticity = np.array([0.51135295, 0.54830051, 0.42198349, 0.72614775, 0.55087905,
0.99819945, 0.3175208 , 0.78232872, 0.11621277, 0.32219236])
# model and minimize
model = RandomForestRegressor(n_estimators=100, random_state=0)
model.fit(input, elasticity)
limits = ((0, 1), (0, 10), (20, 30))
res = differential_evolution(objective, limits, maxiter = 10000, seed = 11111)
min_y = model.predict([res.x])[0]
print("min_elasticity ==", round(min_y, 5))
The output is minimal elasticity based on the limits
min_elasticity == 0.19029
These are random data so the RandomForestRegressor doesn't do the best job perhaps

How to do Constrained Linear Regression - scikit learn?

I am trying to carry out linear regression subject using some constraints to get a certain prediction.
I want to make the model predicting half of the linear prediction, and the last half linear prediction near the last value in the first half using a very narrow range (using constraints) similar to a green line in figure.
The full code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
pd.options.mode.chained_assignment = None # default='warn'
data = [5.269, 5.346, 5.375, 5.482, 5.519, 5.57, 5.593999999999999, 5.627000000000001, 5.724, 5.818, 5.792999999999999, 5.817, 5.8389999999999995, 5.882000000000001, 5.92, 6.025, 6.064, 6.111000000000001, 6.1160000000000005, 6.138, 6.247000000000001, 6.279, 6.332000000000001, 6.3389999999999995, 6.3420000000000005, 6.412999999999999, 6.442, 6.519, 6.596, 6.603, 6.627999999999999, 6.76, 6.837000000000001, 6.781000000000001, 6.8260000000000005, 6.849, 6.875, 6.982, 7.018, 7.042000000000001, 7.068, 7.091, 7.204, 7.228, 7.261, 7.3420000000000005, 7.414, 7.44, 7.516, 7.542000000000001, 7.627000000000001, 7.667000000000001, 7.821000000000001, 7.792999999999999, 7.756, 7.871, 8.006, 8.078, 7.916, 7.974, 8.074, 8.119, 8.228, 7.976, 8.045, 8.312999999999999, 8.335, 8.388, 8.437999999999999, 8.456, 8.227, 8.266, 8.277999999999999, 8.289, 8.299, 8.318, 8.332, 8.34, 8.349, 8.36, 8.363999999999999, 8.368, 8.282, 8.283999999999999]
time = range(1,85,1)
x=int(0.7*len(data))
df = pd.DataFrame(list(zip(*[time, data])))
df.columns = ['time', 'data']
# print df
x=int(0.7*len(df))
train = df[:x]
valid = df[x:]
models = []
names = []
tr_x_ax = []
va_x_ax = []
pr_x_ax = []
tr_y_ax = []
va_y_ax = []
pr_y_ax = []
time_model = []
models.append(('LR', LinearRegression()))
for name, model in models:
x_train=df.iloc[:, 0][:x].values
y_train=df.iloc[:, 1][:x].values
x_valid=df.iloc[:, 0][x:].values
y_valid=df.iloc[:, 1][x:].values
model = LinearRegression()
# poly = PolynomialFeatures(5)
x_train= x_train.reshape(-1, 1)
y_train= y_train.reshape(-1, 1)
x_valid = x_valid.reshape(-1, 1)
y_valid = y_valid.reshape(-1, 1)
# model.fit(x_train,y_train)
model.fit(x_train,y_train.ravel())
# score = model.score(x_train,y_train.ravel())
# print 'score', score
preds = model.predict(x_valid)
tr_x_ax.extend(train['data'])
va_x_ax.extend(valid['data'])
pr_x_ax.extend(preds)
valid['Predictions'] = preds
valid.index = df[x:].index
train.index = df[:x].index
plt.figure(figsize=(5,5))
# plt.plot(train['data'],label='data')
# plt.plot(valid[['Close', 'Predictions']])
x = valid['data']
# print x
# plt.plot(valid['data'],label='validation')
plt.plot(valid['Predictions'],label='Predictions before',color='orange')
y =range(0,58)
y1 =range(58,84)
for index, item in enumerate(pr_x_ax):
if index >13:
pr_x_ax[index] = pr_x_ax[13]
pr_x_ax = list([float(i) for i in pr_x_ax])
va_x_ax = list([float(i) for i in va_x_ax])
tr_x_ax = list([float(i) for i in tr_x_ax])
plt.plot(y,tr_x_ax, label='train' , color='red', linewidth=2)
plt.plot(y1,va_x_ax, label='validation1' , color='blue', linewidth=2)
plt.plot(y1,pr_x_ax, label='Predictions after' , color='green', linewidth=2)
plt.xlabel("time")
plt.ylabel("data")
plt.xticks(rotation=45)
plt.legend()
plt.show()
If you see this figure:
label: Predictions before, the model predicted it without any constraints (I don't need this result).
label: Predictions after, the model predicted it within a constraint but this is after the model predicted AND the all values are equal to last value at index = 71 , item 8.56.
I used for loop for index, item in enumerate(pr_x_ax): in line:64, and the curve is line straight from time 71 to 85 sec as you see in order to show you how I need the model work.
Could I build the model give the same result instead of for loop???
Please your suggestions
I expect that in your question by drawing green line you really expect trained model to predict linear horizontal turn to the right. But current trained model draws just straight orange line.
It is true for any trained model of any algorithm and type that in order to learn some unordinary change in behavior model needs to have at least some samples of that unordinary change. Or at least some hidden meaning in observed data should point to having such unordinary change.
In other words for your model to learn that right turn on green line a model should have points with that right turn in the training data set. But you take for training data just first (leftmost) 70% of data by train = df[:int(0.7 * len(df))] and that training data has no such right turns and this training data just looks close to one straight line.
So you need to re-sample your data into training and validation in a different way - take randomly 70% of samples from whole range of X and the rest goes to validation. So that in your training data samples that do right turn also included.
Second thing is that LinearRegression model always models predictions just with one single straight line, and this line can't have right turns. In order to have right turns you need some more complex model.
One way for a model to have a right turn is to be piece-wise-linear, i.e. having several joined straight lines. I didn't find ready-made piecewise linear models inside sklearn, only using other pip models. So I decided to implement my own simple class PieceWiseLinearRegression that uses np.piecewise() and scipy.optimize.curve_fit() in order to model piecewise linear function.
Next picture shows results of applying two mentioned things above, code goes afterwards, re-sampling dataset in a different way and modeling piece-wise-linear function. Your current linear model LR still makes a prediction using just one straight blue line, while my piecewise linear PWLR2, orange line, consists of two segments and correctly predicts right turn:
To see clearly just one PWLR2 graph I did next picture too:
My class PieceWiseLinearRegression on creation of object accepts just one argument n - number of linear segments to be used for prediction. For picture above n = 2 was used.
import sys, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
np.random.seed(0)
class PieceWiseLinearRegression:
#classmethod
def nargs_func(cls, f, n):
return eval('lambda ' + ', '.join([f'a{i}'for i in range(n)]) + ': f(' + ', '.join([f'a{i}'for i in range(n)]) + ')', locals())
#classmethod
def piecewise_linear(cls, n):
condlist = lambda xs, xa: [(lambda x: (
(xs[i] <= x if i > 0 else np.full_like(x, True, dtype = np.bool_)) &
(x < xs[i + 1] if i < n - 1 else np.full_like(x, True, dtype = np.bool_))
))(xa) for i in range(n)]
funclist = lambda xs, ys: [(lambda i: (
lambda x: (
(x - xs[i]) * (ys[i + 1] - ys[i]) / (
(xs[i + 1] - xs[i]) if abs(xs[i + 1] - xs[i]) > 10 ** -7 else 10 ** -7 * (-1, 1)[xs[i + 1] - xs[i] >= 0]
) + ys[i]
)
))(j) for j in range(n)]
def f(x, *pargs):
assert len(pargs) == (n + 1) * 2, (n, pargs)
xs, ys = pargs[0::2], pargs[1::2]
xa = x.ravel().astype(np.float64)
ya = np.piecewise(x = xa, condlist = condlist(xs, xa), funclist = funclist(xs, ys)).ravel()
#print('xs', xs, 'ys', ys, 'xa', xa, 'ya', ya)
return ya
return cls.nargs_func(f, 1 + (n + 1) * 2)
def __init__(self, n):
self.n = n
self.f = self.piecewise_linear(self.n)
def fit(self, x, y):
from scipy import optimize
self.p, self.e = optimize.curve_fit(self.f, x, y, p0 = [j for i in range(self.n + 1) for j in (np.amin(x) + i * (np.amax(x) - np.amin(x)) / self.n, 1)])
#print('p', self.p)
def predict(self, x):
return self.f(x, *self.p)
data = [5.269, 5.346, 5.375, 5.482, 5.519, 5.57, 5.593999999999999, 5.627000000000001, 5.724, 5.818, 5.792999999999999, 5.817, 5.8389999999999995, 5.882000000000001, 5.92, 6.025, 6.064, 6.111000000000001, 6.1160000000000005, 6.138, 6.247000000000001, 6.279, 6.332000000000001, 6.3389999999999995, 6.3420000000000005, 6.412999999999999, 6.442, 6.519, 6.596, 6.603, 6.627999999999999, 6.76, 6.837000000000001, 6.781000000000001, 6.8260000000000005, 6.849, 6.875, 6.982, 7.018, 7.042000000000001, 7.068, 7.091, 7.204, 7.228, 7.261, 7.3420000000000005, 7.414, 7.44, 7.516, 7.542000000000001, 7.627000000000001, 7.667000000000001, 7.821000000000001, 7.792999999999999, 7.756, 7.871, 8.006, 8.078, 7.916, 7.974, 8.074, 8.119, 8.228, 7.976, 8.045, 8.312999999999999, 8.335, 8.388, 8.437999999999999, 8.456, 8.227, 8.266, 8.277999999999999, 8.289, 8.299, 8.318, 8.332, 8.34, 8.349, 8.36, 8.363999999999999, 8.368, 8.282, 8.283999999999999]
time = list(range(1, 85))
df = pd.DataFrame(list(zip(time, data)), columns = ['time', 'data'])
choose_train = np.random.uniform(size = (len(df),)) < 0.8
choose_valid = ~choose_train
x_all = df.iloc[:, 0].values
y_all = df.iloc[:, 1].values
x_train = df.iloc[:, 0][choose_train].values
y_train = df.iloc[:, 1][choose_train].values
x_valid = df.iloc[:, 0][choose_valid].values
y_valid = df.iloc[:, 1][choose_valid].values
x_all_lin = np.linspace(np.amin(x_all), np.amax(x_all), 500)
models = []
models.append(('LR', LinearRegression()))
models.append(('PWLR2', PieceWiseLinearRegression(2)))
for imodel, (name, model) in enumerate(models):
model.fit(x_train[:, None], y_train)
x_all_lin_pred = model.predict(x_all_lin[:, None])
plt.plot(x_all_lin, x_all_lin_pred, label = f'pred {name}')
plt.plot(x_train, y_train, label='train')
plt.plot(x_valid, y_valid, label='valid')
plt.xlabel('time')
plt.ylabel('data')
plt.legend()
plt.show()

Python package for Bayesian optimization among a given set of samples

I have a set of samples and their corresponding target values. Below is the visualization:
I am trying to use Bayesian optimization to find the sample with maximum target value. My question is if someone knows a python package that does this. I was looking at BayesianOptimization package and it seems that it can perform optimization over an interval, but not necessarily only among the samples.
Edit: Here is a sample code:
import matplotlib.pyplot as plt
import numpy as np
from bayes_opt import BayesianOptimization
mean = (1, 2)
cov = [[1, 0], [0, 1]]
my_x, my_y = np.random.multivariate_normal(mean, cov, 1000).T
cmap_data = -my_x ** 2 - (my_y - 1) ** 2 + 1
search = []
def black_box_function(x, y):
a = np.hstack((x, y))
search.append(a)
plt.scatter(my_x, my_y, marker='.', s=1, c=np.array(cmap_data))
plt.axis('equal')
# plt.scatter(np.array(search)[:, 0], np.array(search)[:, 1], marker='.', s=20, c='black')
plt.plot(np.array(search)[:, 0], np.array(search)[:, 1], marker='.', c='black', linewidth=0.5)
plt.show()
return -x ** 2 - (y - 1) ** 2 + 1
# -- optimization
pbounds = {'x': (min(my_x), max(my_x)), 'y': (min(my_y), max(my_y))}
optimizer = BayesianOptimization(
f=black_box_function,
pbounds=pbounds,
random_state=1,
)
for i in range(1000):
optimizer.probe(
params=[my_x[i], my_y[i]],
lazy=True,
)
optimizer.maximize(
init_points=0,
n_iter=0,
)
But it seems optimizer.probe just evaluates the values (to be used in future iterations), and it is not an actual iteration of optimization.

Bad K-means with Gradient Descent using TensorFlow

Currently learning TensorFlow I'm working to implement kmeans clustering using TensorFlow. I am following a tutorial on TensorFlow which first introduce kmeans then introduce Gradient Descent Optimization
We first generate samples
def create_samples(n_clusters, n_samples_per_cluster, n_features, embiggen_factor, seed):
np.random.seed(seed)
slices = []
centroids = []
# Create samples for each cluster
for i in range(n_clusters):
samples = tf.random_normal((n_samples_per_cluster, n_features),
mean=0.0, stddev=5.0, dtype=tf.float32, seed=seed, name="cluster_{}".format(i))
current_centroid = (np.random.random((1, n_features)) * embiggen_factor) - (embiggen_factor/2)
centroids.append(current_centroid)
samples += current_centroid
slices.append(samples)
# Create a big "samples" dataset
samples = tf.concat(0, slices, name='samples')
centroids = tf.concat(0, centroids, name='centroids')
return centroids, samples
then define 2 function assign & update (+ euclidian distance) as usual
def assign(data, centroids):
# Explanations here: http://learningtensorflow.com/lesson6/
expanded_vectors = tf.expand_dims(samples, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
# nice trick here: use 'sub' "pairwisely" (thats why we just used "expand")
#
distances = tf.reduce_sum( tf.square(
tf.sub(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
nearest_indices = mins
return nearest_indices
def update(data, nearest_indices, n_clusters):
# Updates the centroid to be the mean of all samples associated with it.
nearest_indices = tf.to_int32(nearest_indices)
partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters)
new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions])
return new_centroids
def euclidian_distance(x, y):
sqd = tf.squared_difference(tf.cast(x, "float32"),tf.cast(y, "float32"))
sumsqd = tf.reduce_sum(sqd)
sqrtsumsqd = tf.sqrt(sumsqd)
return sqrtsumsqd
Then define the TensorFlow model to run:
import tensorflow as tf
import numpy as np
nclusters = 3
nsamplespercluster = 500
nfeatures = 2
embiggenfactor = 70
seed = 700
np.random.seed(seed)
ocentroids, samples = create_samples(nclusters, nsamplespercluster, nfeatures, embiggenfactor, seed)
X = tf.placeholder("float32", [nclusters*nsamplespercluster, 2])
# chosing random sample points as initial centroids.
centroids = tf.Variable([samples[i] for i in np.random.choice(range(nclusters*nsamplespercluster), nclusters)])#, [10.,10.]])
mean=0.0, stddev=150, dtype=tf.float32, seed=seed))
nearest_indices = assign(X, centroids)
new_centroids = update(X, nearest_indices, nclusters)
# Our error is defined as the square of the differences between centroid
error = euclidian_distance(centroids, new_centroids)
# The Gradient Descent Optimizer
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(error)
model = tf.initialize_all_variables()
with tf.Session() as session:
data = session.run(samples)
session.run(model)
epsilon = 0.08
err = float("inf")
count = 0
while err > epsilon:
_, err = session.run([train_op, error], {X: data})
print(err)
clustering = session.run(nearest_indices)
centers = session.run(centroids)
count += 1
# Plot each 100 iteration to see progress
if (count % 100) == 0:
print(count)
plt.figure()
plt.scatter(data[:,0], data[:,1], c=clustering)
plt.scatter(centers[:,0], centers[:,1], s=300, c="orange", marker="x", linewidth=5)
print("%d iterations" % count)
plt.figure()
plt.scatter(data[:,0], data[:,1], c=clustering)
plt.scatter(centers[:,0], centers[:,1], s=300, c="orange", marker="x", linewidth=5)
This is actually working (running) but the result is decieving:
After around 1600 iteration the result is so bad. I dont even figure out how some points can be "lost" (= clustered as a color they are so away from). To my mind kmeans can converge rlly fast on such case. Here it is not even converging to a good solution. Is it due to Gradient Descent? (don't see how could it be but...)
Thanks for advices!
pltrdy

Categories

Resources