Make predictions using a tensorflow graph from a keras model - python

I have a model trained using Keras with Tensorflow as my backend, but now I need to turn my model into a tensorflow graph for a certain application. I attempted to do this and make predictions to insure that it is working correctly, but when comparing to the results gathered from model.predict() I get very different values. For instance:
from keras.models import load_model
import tensorflow as tf
model = load_model('model_file.h5')
x_placeholder = tf.placeholder(tf.float32, shape=(None,7214,1))
y = model(x_placeholder)
x = np.ones((1,7214,1))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("Predictions from:\ntf graph: "+str(sess.run(y, feed_dict={x_placeholder:x})))
print("keras predict: "+str(model.predict(x)))
returns:
Predictions from:
tf graph: [[-0.1015993 0.07432419 0.0592984 ]]
keras predict: [[ 0.39339241 0.57949686 -3.67846966]]
The values from keras predict are correct, but the tf graph results are not.
If it helps to know the final intended application, I am creating a jacobian matrix with the tf.gradients() function, but currently it does not return the correct results when comparing to theano's jacobian function, which gives the correct jacobian. Here is my tensorflow jacobian code:
x = tf.placeholder(tf.float32, shape=(None,7214,1))
y = tf.reshape(model(x)[0],[-1])
y_list = tf.unstack(y)
jacobian_list = [tf.gradients(y_, x)[0] for y_ in y_list]
jacobian = tf.stack(jacobian_list)
EDIT: Model code
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, InputLayer, Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
# activation function used following every layer except for the output layers
activation = 'relu'
# model weight initializer
initializer = 'he_normal'
# shape of input data that is fed into the input layer
input_shape = (None,7214,1)
# number of filters used in the convolutional layers
num_filters = [4,16]
# length of the filters in the convolutional layers
filter_length = 8
# length of the maxpooling window
pool_length = 4
# number of nodes in each of the hidden fully connected layers
num_hidden_nodes = [256,128]
# number of samples fed into model at once during training
batch_size = 64
# maximum number of interations for model training
max_epochs = 30
# initial learning rate for optimization algorithm
lr = 0.0007
# exponential decay rate for the 1st moment estimates for optimization algorithm
beta_1 = 0.9
# exponential decay rate for the 2nd moment estimates for optimization algorithm
beta_2 = 0.999
# a small constant for numerical stability for optimization algorithm
optimizer_epsilon = 1e-08
model = Sequential([
InputLayer(batch_input_shape=input_shape),
Conv1D(kernel_initializer=initializer, activation=activation, padding="same", filters=num_filters[0], kernel_size=filter_length),
Conv1D(kernel_initializer=initializer, activation=activation, padding="same", filters=num_filters[1], kernel_size=filter_length),
MaxPooling1D(pool_size=pool_length),
Flatten(),
Dense(units=num_hidden_nodes[0], kernel_initializer=initializer, activation=activation),
Dense(units=num_hidden_nodes[1], kernel_initializer=initializer, activation=activation),
Dense(units=3, activation="linear", input_dim=num_hidden_nodes[1]),
])
# compile model
loss_function = mean squared error
early_stopping_min_delta = 0.0001
early_stopping_patience = 4
reduce_lr_factor = 0.5
reuce_lr_epsilon = 0.0009
reduce_lr_patience = 2
reduce_lr_min = 0.00008
optimizer = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=optimizer_epsilon, decay=0.0)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=early_stopping_min_delta,
patience=early_stopping_patience, verbose=2, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, epsilon=reuce_lr_epsilon,
patience=reduce_lr_patience, min_lr=reduce_lr_min, mode='min', verbose=2)
model.compile(optimizer=optimizer, loss=loss_function)
model.fit(train_x, train_y, validation_data=(cv_x, cv_y),
epochs=max_epochs, batch_size=batch_size, verbose=2,
callbacks=[reduce_lr,early_stopping])
model.save('model_file.h5')

#frankyjuang linked me to here
https://github.com/amir-abdi/keras_to_tensorflow
and combining this with code from
https://github.com/metaflow-ai/blog/blob/master/tf-freeze/load.py
and
https://github.com/tensorflow/tensorflow/issues/675
I have found a solution to both predicting using a tf graph and creating the jacobian function:
import tensorflow as tf
import numpy as np
# Create function to convert saved keras model to tensorflow graph
def convert_to_pb(weight_file,input_fld='',output_fld=''):
import os
import os.path as osp
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from keras.models import load_model
from keras import backend as K
# weight_file is a .h5 keras model file
output_node_names_of_input_network = ["pred0"]
output_node_names_of_final_network = 'output_node'
# change filename to a .pb tensorflow file
output_graph_name = weight_file[:-2]+'pb'
weight_file_path = osp.join(input_fld, weight_file)
net_model = load_model(weight_file_path)
num_output = len(output_node_names_of_input_network)
pred = [None]*num_output
pred_node_names = [None]*num_output
for i in range(num_output):
pred_node_names[i] = output_node_names_of_final_network+str(i)
pred[i] = tf.identity(net_model.output[i], name=pred_node_names[i])
sess = K.get_session()
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names)
graph_io.write_graph(constant_graph, output_fld, output_graph_name, as_text=False)
print('saved the constant graph (ready for inference) at: ', osp.join(output_fld, output_graph_name))
return output_fld+output_graph_name
Call:
tf_model_path = convert_to_pb('model_file.h5','/model_dir/','/model_dir/')
Create function to load the tf model as a graph:
def load_graph(frozen_graph_filename):
# We load the protobuf file from the disk and parse it to retrieve the
# unserialized graph_def
with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# Then, we can use again a convenient built-in function to import a graph_def into the
# current default Graph
with tf.Graph().as_default() as graph:
tf.import_graph_def(
graph_def,
input_map=None,
return_elements=None,
name="prefix",
op_dict=None,
producer_op_list=None
)
input_name = graph.get_operations()[0].name+':0'
output_name = graph.get_operations()[-1].name+':0'
return graph, input_name, output_name
Create a function to make model predictions using the tf graph
def predict(model_path, input_data):
# load tf graph
tf_model,tf_input,tf_output = load_graph(model_path)
# Create tensors for model input and output
x = tf_model.get_tensor_by_name(tf_input)
y = tf_model.get_tensor_by_name(tf_output)
# Number of model outputs
num_outputs = y.shape.as_list()[0]
predictions = np.zeros((input_data.shape[0],num_outputs))
for i in range(input_data.shape[0]):
with tf.Session(graph=tf_model) as sess:
y_out = sess.run(y, feed_dict={x: input_data[i:i+1]})
predictions[i] = y_out
return predictions
Make predictions:
tf_predictions = predict(tf_model_path,test_data)
Jacobian function:
def compute_jacobian(model_path,input_data):
tf_model,tf_input,tf_output = load_graph(model_path)
x = tf_model.get_tensor_by_name(tf_input)
y = tf_model.get_tensor_by_name(tf_output)
y_list = tf.unstack(y)
num_outputs = y.shape.as_list()[0]
jacobian = np.zeros((num_outputs,input_data.shape[0],input_data.shape[1]))
for i in range(input_data.shape[0]):
with tf.Session(graph=tf_model) as sess:
y_out = sess.run([tf.gradients(y_, x)[0] for y_ in y_list], feed_dict={x: input_data[i:i+1]})
jac_temp = np.asarray(y_out)
jacobian[:,i:i+1,:]=jac_temp[:,:,:,0]
return jacobian
Compute Jacobian Matrix:
jacobians = compute_jacobian(tf_model_path,test_data)

Related

keras layer with constrained weights raises error during the init of the model

I am training an actor-critic model that has a constrained layer in the actor network. The constraint forces the weights to be diagonal. Below is a min reproducible example
import tensorflow.compat.v1 as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Conv1D, Concatenate, BatchNormalization, Reshape
from tensorflow.keras.constraints import Constraint
from tensorflow.python.keras.utils.vis_utils import plot_model
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.backend import set_session
from tensorflow.python.keras import backend as K
import numpy as np
import random
from collections import deque
tf.disable_v2_behavior()
# For more repetitive results
np.random.seed(1)
random.seed(1)
class DiagonalWeight(Constraint):
"""Constrains the weights to be diagonal.
"""
def __call__(self, w):
N = K.int_shape(w)[-1]
m = K.eye(N)
return w*m
state_shape = (10,3)
class AC():
def __init__(self, sess, LRA, LRC):
self.sess = sess #session
self.LRA = LRA #learning rate for actor
self.LRC = LRC #learning rate for critic
self.graph = tf.get_default_graph()
set_session(self.sess)
# training actor and target actor
self.actor, self.input_actor = self.create_actor()
plot_model(self.actor, to_file='Ac_architecture.png', show_shapes=True, show_layer_names=True)
self.target_actor, _ = self.create_actor()
###initialize the weights of the target with the weights of training actor
self.target_actor.set_weights(self.actor.get_weights())
#training critic and target critic
self.critic, self.critic_state_input, self.critic_action_input = self.create_critic()
self.target_critic, _, _ = self.create_critic()
plot_model(self.critic, to_file='Cr_architecture.png', show_shapes=True, show_layer_names=True)
###initialize the weights of the target with the weights of training critic
self.target_critic.set_weights(self.critic.get_weights())
######################## Actor/Critic Grads ########################################################
self.actor_critic_grad = tf.placeholder(tf.float32, [None, state_shape[0], state_shape[0]+2])
actor_weights = self.actor.trainable_weights
self.actor_grads = tf.gradients(self.actor.output, actor_weights, -self.actor_critic_grad)
grads = zip(self.actor_grads, actor_weights)
self.optimize = tf.train.AdamOptimizer(self.LRA).apply_gradients(grads)
self.critic_grads = tf.gradients(self.critic.output, self.critic_action_input)
# Initialize for later gradient calculations
self.sess.run(tf.global_variables_initializer())
#######################################################################################################################
def create_actor(self):
actor_input = Input(shape=state_shape, name='state_input')
h3 = Conv1D(128, 3, padding='same', activation='relu', name='h3')(actor_input)
h3 = BatchNormalization(name='h3_BN')(h3)
matrix = Conv1D(state_shape[0], 3, padding='same', activation='relu', name='matrix')(h3)
vect0= Conv1D(1, 3, padding='same', activation='relu', name='vect0')(h3)
vect0_resh = Reshape((1, state_shape[0]))(vect0)
vect1 = Dense(state_shape[0], activation='relu', name='vect1',\
use_bias=False, kernel_constraint=DiagonalWeight())(vect0_resh) #, kernel_constraint=DiagonalWeight()
vect1 = Reshape((state_shape[0],1))(vect1)
actor_output = Concatenate(axis=-1)([vect0, vect1, matrix])
model = Model(actor_input, actor_output)
adam = Adam(lr=self.LRA)
model.compile(loss="mse", optimizer=adam)
return model, actor_input
def create_critic(self):
state_input = Input(shape=state_shape, name='state_input')
action_input = Input(shape=(state_shape[0],state_shape[0]+2), name='action_input')
critic_input = Concatenate(axis=-1)([state_input, action_input])
h3 = Conv1D(128, 3, padding='same', name='h3')(critic_input)
h3 = BatchNormalization(name='h3_BN')(h3)
Q = Conv1D(state_shape[0]+2, 3, padding='same', name='Q')(h3)
model = Model([state_input, action_input], Q)
adam = Adam(lr=self.LRC)
model.compile(loss="mse", optimizer=adam)
return model, state_input, action_input
sess = tf.compat.v1.Session()
K.set_session(sess)
agent = AC(sess, 0.01, 0.001)
When I romove the kernel constraint from the dense layer, everything works fine, but when I add it, I get one of the following three errors:
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_19' with dtype float and shape [?,10,12]
[[node Placeholder_19 (defined at AC.py:59) ]]
or
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'state_input' with dtype float and shape [?,10,3]
[[node state_input (defined at AC.py:73) ]]
or
tensorflow.python.framework.errors_impl.FailedPreconditionError: Error while reading resource variable beta1_power from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/beta1_power/class tensorflow::Var does not exist.
[[node Adam/update_vect1/kernel/ResourceApplyAdam/ReadVariableOp (defined at AC.py:64) ]]
The kernel in the dense layer is a square matrix, so applying the diagonal constraint should pose no problem.
For anyone interested in the solution:
In class DiagonalWeight, I changed the line m = K.eye(N) with m = tf.eye(N)
I don't know exactly how does this worked, but I gess it is something related to the session. Tensorflow needs to store the operation for future use in the training.

skopt's gp_minimize() function raises ValueError: array must not contain infs or NaNs

I am currently using the skopt (scikit-optimize) package for hyperparameter tuning of a neural network (I am trying to minimize -1* accuracy). It seems to run fine (and successfully prints to the console) for several iterations before it raises Value Error: array must not contain infs or NaNs.
What are some possible causes of this? My data does not contain infs or NaNs and neither do my search parameter ranges. The neural network code is quite long, so for brevity, I will paste the relevant sections:
Imports:
import pandas as pd
import numpy as np
from skopt import gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer
from tensorflow.python.framework import ops
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Dropout, MaxPooling1D, Flatten
from keras import backend as K
Creation of search parameters:
dim_num_filters_L1 = Integer(low=1, high=50, name='num_filters_L1')
#dim_kernel_size_L1 = Integer(low=1, high=70, name='kernel_size_L1')
dim_activation_L1 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L1')
dim_num_filters_L2 = Integer(low=1, high=50, name='num_filters_L2')
#dim_kernel_size_L2 = Integer(low=1, high=70, name='kernel_size_L2')
dim_activation_L2 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L2')
dim_num_dense_nodes = Integer(low=1, high=28, name='num_dense_nodes')
dim_activation_L3 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L3')
dim_dropout_rate = Real(low = 0, high = 0.5, name = 'dropout_rate')
dim_learning_rate = Real(low=1e-4, high=1e-2, name='learning_rate')
dimensions = [dim_num_filters_L1,
#dim_kernel_size_L1,
dim_activation_L1,
dim_num_filters_L2,
#dim_kernel_size_L2,
dim_activation_L2,
dim_num_dense_nodes,
dim_activation_L3,
dim_dropout_rate,
dim_learning_rate,
]
Function that creates all models that will be tested:
def create_model(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate):
input_shape = (X_train.shape[1], 1)
model = Sequential()
model.add(Conv1D(num_filters_L1, kernel_size = 40, activation = activation_L1, input_shape = input_shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(num_filters_L2, kernel_size=20, activation=activation_L2))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(num_dense_nodes, activation = activation_L3))
model.add(Dropout(dropout_rate))
model.add(Dense(y_train.shape[1], activation='linear'))
adam = tensorflow.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(optimizer=adam, loss='mean_squared_error', metrics=['accuracy'])
return model
Define fitness function:
#use_named_args(dimensions=dimensions)
def fitness(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate):
model = create_model(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate)
history_opt = model.fit(x=X_train,
y=y_train,
validation_data=(X_val,y_val),
shuffle=True,
verbose=2,
epochs=10
)
#return the validation accuracy for the last epoch.
accuracy_opt = model.evaluate(X_test,y_test)[1]
# Print the classification accuracy:
print("Experimental Model Accuracy: {0:.2%}".format(accuracy_opt))
# Delete the Keras model with these hyper-parameters from memory:
del model
# Clear the Keras session, otherwise it will keep adding new models to the same TensorFlow graph each time we create model with a different set of hyper-parameters.
K.clear_session()
ops.reset_default_graph()
# the optimizer aims for the lowest score, so return negative accuracy:
return -accuracy # or sum(RMSE)?
Run hyperparameter search:
gp_result = gp_minimize(func=fitness,
dimensions=dimensions)
print("best accuracy was " + str(round(gp_result.fun *-100,2))+"%.")
Your activation function is not converging in a random acquisition function call. I encountered this problem and removed 'relu' function from search space.

Why is my neural network handwritten digital test set cross entropy correct, the output rate is always 10% correct compared to the label?

I run my code in spyder,the test set cross entropy is correct, but the accuracy of test set is always very low. This is my code. I use the mnist. Any advice how I can improve the performance?
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import fully_connected
from tensorflow.examples.tutorials.mnist import input_data
x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])
test_x = tf.placeholder(dtype=tf.float32,shape=[None,784])
test_y = tf.placeholder(dtype=tf.float32,shape=[None,10])
mnist = input_data.read_data_sets("/home/xuenzhu/mnist_data", one_hot=True)
hidden1 = fully_connected(x,100,activation_fn=tf.nn.relu,weights_initializer=tf.random_normal_initializer())
hidden2 = fully_connected(hidden1,100,activation_fn=tf.nn.relu,weights_initializer=tf.random_normal_initializer())
outputs = fully_connected(hidden2,10,activation_fn=tf.nn.relu,weights_initializer=tf.random_normal_initializer())
loss = tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=outputs)
reduce_mean_loss = tf.reduce_mean(loss)
equal_result = tf.equal(tf.argmax(outputs,1),tf.argmax(y,1))
cast_result = tf.cast(equal_result,dtype=tf.float32)
accuracy = tf.reduce_mean(cast_result)
train_op = tf.train.AdamOptimizer(0.001).minimize(reduce_mean_loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
for i in range(10000):
xs,ys = mnist.train.next_batch(128)
sess.run(train_op,feed_dict={x:xs,y:ys})
if i%1000==0:
print(sess.run(equal_result,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
print(sess.run(reduce_mean_loss,feed_dict={x:mnist.test.images,y:mnist.test.labels}))[enter image description here][1]
print(sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
There is no use in using a ReLu activation function before applying softmax_cross_entropy. Change the activation function in the last fully connected layer to None and you'll get good accuracy.
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])
test_x = tf.placeholder(dtype=tf.float32,shape=[None,784])
test_y = tf.placeholder(dtype=tf.float32,shape=[None,10])
mnist = input_data.read_data_sets("/home/xuenzhu/mnist_data", one_hot=True)
hidden1 = fully_connected(x,100,activation_fn=tf.nn.relu,weights_initializer=tf.random_normal_initializer())
hidden2 = fully_connected(hidden1,100,activation_fn=tf.nn.relu,weights_initializer=tf.random_normal_initializer())
outputs = fully_connected(hidden2,10,activation_fn=None,weights_initializer=tf.random_normal_initializer())
loss = tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=outputs)
reduce_mean_loss = tf.reduce_mean(loss)
equal_result = tf.equal(tf.argmax(outputs,1),tf.argmax(y,1))
cast_result = tf.cast(equal_result,dtype=tf.float32)
accuracy = tf.reduce_mean(cast_result)
train_op = tf.train.AdamOptimizer(0.001).minimize(reduce_mean_loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
for i in range(10000):
xs,ys = mnist.train.next_batch(128)
sess.run(train_op,feed_dict={x:xs,y:ys})
if i%1000==0:
print(sess.run(equal_result,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
print(sess.run(reduce_mean_loss,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
print(sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels}))
Try changing this
for i in range(10000): #Try increasing this value to >10k. Try 100k or higher
You should be able to see an increase in accuracy thereafter.

How to use an autoencoder to visualize dimensionality reduction? (Python | TensorFlow)

I'm trying to adapt Aymeric Damien's code to visualize the dimensionality reduction performed by an autoencoder implemented in TensorFlow. All of the examples I have seen work on the mnist digits dataset but I wanted to use this method to visualize the iris dataset in 2 dimensions as a toy example so I can figure out how to tweak it for my real-world datasets.
My question is: How can one get the sample-specific 2 dimensional embeddings to visualize?
For example, the iris dataset has 150 samples with 4 attributes. I added 4 noise attributes to get a total of 8 attributes. The encoding/decoding follows: [8, 4, 2, 4, 8] but I'm not sure how to extract an array of shape (150, 2) to visualize the embeddings. I haven't found any tutorials on how to visualize the dimensionality reduction using TensorFlow.
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
# Set random seeds
np.random.seed(0)
tf.set_random_seed(0)
# Load data
iris = load_iris()
# Original Iris : (150,4)
X_iris = iris.data
# Iris with noise : (150,8)
X_iris_with_noise = np.concatenate([X_iris, np.random.random(size=X_iris.shape)], axis=1).astype(np.float32)
y_iris = iris.target
# PCA
pca_xy = PCA(n_components=2).fit_transform(X_iris_with_noise)
with plt.style.context("seaborn-white"):
fig, ax = plt.subplots()
ax.scatter(pca_xy[:,0], pca_xy[:,1], c=y_iris, cmap=plt.cm.Set2)
ax.set_title("PCA | Iris with noise")
# Training Parameters
learning_rate = 0.01
num_steps = 1000
batch_size = 10
display_step = 250
examples_to_show = 10
# Network Parameters
num_hidden_1 = 4 # 1st layer num features
num_hidden_2 = 2 # 2nd layer num features (the latent dim)
num_input = 8 # Iris data input
# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input], name="input")
weights = {
'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1]), dtype=tf.float32, name="encoder_h1"),
'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2]), dtype=tf.float32, name="encoder_h2"),
'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1]), dtype=tf.float32, name="decoder_h1"),
'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input]), dtype=tf.float32, name="decoder_h2"),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1]), dtype=tf.float32, name="encoder_b1"),
'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2]), dtype=tf.float32, name="encoder_b2"),
'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1]), dtype=tf.float32, name="decoder_b1"),
'decoder_b2': tf.Variable(tf.random_normal([num_input]), dtype=tf.float32, name="decoder_b2"),
}
# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
# Encoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
biases['encoder_b2']))
return layer_2
# Building the decoder
def decoder(x):
# Decoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
biases['decoder_b2']))
return layer_2
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start Training
# Start a new TF session
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Training
for i in range(1, num_steps+1):
# Prepare Data
# Get the next batch of Iris data
idx_train = np.random.RandomState(i).choice(np.arange(X_iris_with_noise.shape[0]), size=batch_size)
batch_x = X_iris_with_noise[idx_train,:]
# Run optimization op (backprop) and cost op (to get loss value)
_, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
# Display logs per step
if i % display_step == 0 or i == 1:
print('Step %i: Minibatch Loss: %f' % (i, l))
Your embedding is accessible with h = encoder(X). Then, for each batch you can get the value as follow:
_, l, embedding = sess.run([optimizer, loss, h], feed_dict={X: batch_x})
There is an even nicer solution with TensorBoard using Embeddings Visualization (https://www.tensorflow.org/programmers_guide/embedding):
from tensorflow.contrib.tensorboard.plugins import projector
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = h.name
# Use the same LOG_DIR where you stored your checkpoint.
summary_writer = tf.summary.FileWriter(LOG_DIR)
projector.visualize_embeddings(summary_writer, config)

Why do I get a different result each time I use my tensorflow model?

I have saved a trained model based on recurrent neural networks. When I run the following function 'lstm_vector_predict()', it returns a different value every time even though it is loading the same model. Does tensor flow use some random number generation when predicting values?
import get_list_of_values_to_input
import tensorflow as tf
import tensorflow.contrib.learn as tflearn
import tensorflow.contrib.layers as tflayers
from tensorflow.contrib.learn.python.learn import learn_runner
import tensorflow.contrib.metrics as metrics
import tensorflow.contrib.rnn as rnn
import numpy as np
from backend.common.numpy_array_to_numpy_array_of_arrays import get_numpy_arrays_from_numpy_matrix
def lstm_vector_predict(model_name='sample_model_vector.meta', number_of_tickers=2, batch_size=20,number_of_points=100, start_time=1489462200):
tf.reset_default_graph()
inputs = number_of_tickers
hidden = 100
output = number_of_tickers
current_time = start_time
X = tf.placeholder(tf.float32, [None, batch_size, inputs])
# This is low level tensor flow stuff used for preparing output of data generation
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden, activation=tf.nn.relu)
rnn_output, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)
stacked_rnn_output = tf.reshape(rnn_output, [-1, hidden])
stacked_outputs = tf.layers.dense(stacked_rnn_output, output)
outputs = tf.reshape(stacked_outputs, [-1, batch_size, output])
# We get the saver ready
saver = tf.train.import_meta_graph(model_name)
init = tf.global_variables_initializer()
# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
return_values = []
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, tf.train.latest_checkpoint('./'))
print("Model restored.")
# Check the values of the variables
sess.run(init)
for i in range(number_of_points):
last_values = get_list_of_values_to_input()
print("Generating point", i)
#x_generators = last_values[-batch_size:]
x_generators = last_values[-batch_size:].reshape(-1, batch_size, number_of_tickers)
y_forecast = sess.run(outputs, feed_dict={X: x_generators})
return_values.append(y_forecast[-1][-1])
current_time += 300
return return_values
You will see different results because of the stochastic nature of the LSTM model, and because it is hard to fix the random seed for LSTM models to get 100% reproducible results.

Categories

Resources