I am using pre-trained Alexnet as shown below. I want to use that model for regression with 6 outputs (Xcoordinate (range (0,227),Ycoordinate (range (0,227),height (range (20,50), width (range (20,50), sine(theta), cos(theta)). (range of theta is -180 to 180 degrees)
These are the following things I change -
changed loss function to MSE.
changed the output layer from 1000 to 6.
changed from RELU to linear activation function last layer.
Now, I am not getting proper valued of sine and cosine above (it should be in the range of (-1 to 1)), I am getting out of bound values. What should I do, How should I keep a bound one the values. Also, should I keep a bout on other parameters as well. What should I do incorporate those changes?
What are the other changes should I make to use this model for regression.?
import tensorflow as tf
import numpy as np
class AlexNet(object):
def __init__(self, x, keep_prob, num_classes, skip_layer,
weights_path = 'DEFAULT'):
# Parse input arguments into class variables
self.X = x
self.NUM_CLASSES = num_classes
self.KEEP_PROB = keep_prob
self.SKIP_LAYER = skip_layer
if weights_path == 'DEFAULT':
self.WEIGHTS_PATH = 'bvlc_alexnet.npy'
else:
self.WEIGHTS_PATH = weights_path
# Call the create function to build the computational graph of AlexNet
self.create()
def create(self):
# 1st Layer: Conv (w ReLu) -> Pool -> Lrn
conv1 = conv(self.X, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1')
pool1 = max_pool(conv1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1')
norm1 = lrn(pool1, 2, 2e-05, 0.75, name = 'norm1')
# 2nd Layer: Conv (w ReLu) -> Pool -> Lrn with 2 groups
conv2 = conv(norm1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2')
pool2 = max_pool(conv2, 3, 3, 2, 2, padding = 'VALID', name ='pool2')
norm2 = lrn(pool2, 2, 2e-05, 0.75, name = 'norm2')
# 3rd Layer: Conv (w ReLu)
conv3 = conv(norm2, 3, 3, 384, 1, 1, name = 'conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5')
pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, 6*6*256])
fc6 = fc(flattened, 6*6*256, 4096, name='fc6',relu =True)
dropout6 = dropout(fc6, self.KEEP_PROB)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, 4096, 4096, name = 'fc7',relu =False)
# dropout7 = dropout(fc7, self.KEEP_PROB)
# 8th Layer: FC and return unscaled activations (for tf.nn.softmax_cross_entropy_with_logits)
self.fc8 = fc(fc7, 4096, self.NUM_CLASSES, name='fc8',relu = False)
def load_initial_weights(self, session):
"""
As the weights from http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/ come
as a dict of lists (e.g. weights['conv1'] is a list) and not as dict of
dicts (e.g. weights['conv1'] is a dict with keys 'weights' & 'biases') we
need a special load function
"""
# Load the weights into memory
weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item()
# Loop over all layer names stored in the weights dict
for op_name in weights_dict:
# Check if the layer is one of the layers that should be reinitialized
if op_name not in self.SKIP_LAYER:
with tf.variable_scope(op_name, reuse = True):
# Loop over list of weights/biases and assign them to their corresponding tf variable
for data in weights_dict[op_name]:
# Biases
if len(data.shape) == 1:
var = tf.get_variable('biases', trainable = False)
session.run(var.assign(data))
# Weights
else:
var = tf.get_variable('weights', trainable = False)
session.run(var.assign(data))
"""
Predefine all necessary layer for the AlexNet
"""
def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,
padding='SAME', groups=1):
"""
Adapted from: https://github.com/ethereon/caffe-tensorflow
"""
# Get number of input channels
input_channels = int(x.get_shape()[-1])
# Create lambda function for the convolution
convolve = lambda i, k: tf.nn.conv2d(i, k,
strides = [1, stride_y, stride_x, 1],
padding = padding)
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases of the conv layer
weights = tf.get_variable('weights', shape = [filter_height, filter_width, input_channels/groups, num_filters])
biases = tf.get_variable('biases', shape = [num_filters])
if groups == 1:
conv = convolve(x, weights)
# In the cases of multiple groups, split inputs & weights and
else:
# Split input and weights and convolve them separately
#input_groups = tf.split(value=x, num_split= groups, split_dim=3)
#input_groups = tf.split(split_dim=3, num_split= groups,value=x)
input_groups = tf.split(axis = 3, num_or_size_splits=groups, value=x)
# weight_groups = tf.split(value =weights, num_split=groups, split_dim=3)
weight_groups = tf.split(axis = 3, num_or_size_splits=groups, value=weights)
output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)]
# Concat the convolved output together again
#conv = tf.concat( values = output_groups,concat_dim = 3)
conv = tf.concat(axis = 3, values = output_groups)
# Add biases
bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
# Apply relu function
relu = tf.nn.relu(bias, name = scope.name)
return relu
#def fc(x, num_in, num_out, name, relu = True):
def fc(x, num_in, num_out, name, relu):
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases
weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True)
biases = tf.get_variable('biases', [num_out], trainable=True)
# Matrix multiply weights and inputs and add bias
act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)
if relu == True:
# Apply ReLu non linearity
relu = tf.nn.relu(act)
return relu
else:
return act
def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):
return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
strides = [1, stride_y, stride_x, 1],
padding = padding, name = name)
def lrn(x, radius, alpha, beta, name, bias=1.0):
return tf.nn.local_response_normalization(x, depth_radius = radius, alpha = alpha,
beta = beta, bias = bias, name = name)
def dropout(x, keep_prob):
return tf.nn.dropout(x, keep_prob)
Now the code for the loss function ad optimizer is
# Op for calculating the loss
with tf.name_scope("cross_ent"):
loss = tf.reduce_mean(tf.squared_difference(score, y))
# Train op
with tf.name_scope("train"):
# Get gradients of all trainable variables
gradients = tf.gradients(loss, var_list)
gradients = list(zip(gradients, var_list))
# Create optimizer and apply gradient descent to the trainable variables
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(grads_and_vars=gradients)
Anything I should change in this part?
Or any comments, or anything I should take care of to change the model from classifcation to regression.
I am new to tensorflow and deep learning
Related
I've developed a custom convolutional layer. I can use it inside a model and train it (model.fit works), but model.predict() yields an error!
I will add a simple code to demonstrate how the code is structured.
modelx1 = tf.keras.models.Sequential([tf.keras.Input(shape=(49,)), Dense(1, activation = 'relu')])
class customLayer(tf.keras.layers.Layer):
def __init__(self,n=10):super(customLayer, self).__init__()
def call(self, inputs):
_, Dim0,Dim1, Dim3 = inputs.shape
input_victorized = tf.image.extract_patches(images=inputs, sizes=[-1, 7, 7, 1],
strides=[1, 1, 1, 1],rates=[1, 1, 1, 1], padding='SAME')
input_victorized2 = tf.reshape(input_victorized, [-1,49])
model_output = modelx1(input_victorized2)
out = tf.reshape(model_output,[-1,Dim0,Dim1,Dim3])
return out
The custom layer reshapes the input, then feeds it to 'modelx1' then it reshapes the output.
Here is a simple model where the custom layer is used:
input1 = tf.keras.Input(shape=(28,28,1))
x = Conv2D(filters = 2, kernel_size = 5, activation = 'relu')(input1)
Layeri = customLayer()(x)
xxc = Flatten()(Layeri)
y = Dense(units = 3, activation = 'softmax')(xxc)
model = tf.keras.Model(inputs=input1, outputs=y)
model.summary()
The error appears when I run model.predict:
model.predict(np.ones([100,28,28,1]))
UnimplementedError: Only support ksizes across space.
[[node model_58/custom_layer_9/ExtractImagePatches
(defined at <ipython-input-279-953feb59f882>:7)
]] [Op:__inference_predict_function_14640]
Errors may have originated from an input operation.
Input Source operations connected to node model_58/custom_layer_9/ExtractImagePatches:
In[0] model_58/conv2d_98/Relu (defined at /usr/local/lib/python3.7/dist-packages/keras/backend.py:4867)
I think this should work:-
image = tf.expand_dims(image, 0)
extracted_patches = tf.image.extract_patches(images = image,
sizes = [1, int(0.5 * image_height), int(0.5 * image_width), 1],
strides = [1, int(0.5 * image_height), int(0.5 * image_width), 1],
rates = [1, 1, 1, 1],
padding = "SAME")
And then use tf.reshape to extract these patches
patches = tf.reshape(extracted_patches,
[-1,int(0.5*image_height),int(0.5*image_width),3])
I had a similar error a couple of months back; This Fixed it!
`
how can i convert my older version of tensorflow code to newer version as CNN ,RNN ,CTC is not working in newer version. I updated tensorflow thereafter many of the function stop working properly and shows error. Some of the function are not in the package anymore.
I dont have idea about how to convert it into new version of the tensorflow
from __future__ import absolute_import, division, print_function, unicode_literals
import codecs
import sys
import numpy as np
import tensorflow as tf
from DataLoader import FilePaths
import matplotlib.pyplot as plt
class DecoderType:
BestPath = 0
WordBeamSearch = 1
BeamSearch = 2
class Model:
# Model Constants
batchSize = 10 # 50
imgSize = (800, 64)
maxTextLen = 100
def __init__(self, charList, decoderType=DecoderType.BestPath, mustRestore=False):
self.charList = charList
self.decoderType = decoderType
self.mustRestore = mustRestore
self.snapID = 0
# input image batch
self.inputImgs =tf.compat.v1.placeholder(tf.float32, shape=(None, Model.imgSize[0], Model.imgSize[1]))
# setup CNN, RNN and CTC
self.setupCNN()
self.setupRNN()
self.setupCTC()
# setup optimizer to train NN
self.batchesTrained = 0
self.learningRate = tf.placeholder(tf.float32, shape=[])
self.optimizer = tf.train.RMSPropOptimizer(self.learningRate).minimize(self.loss)
# Initialize TensorFlow
(self.sess, self.saver) = self.setupTF()
self.training_loss_summary = tf.summary.scalar('loss', self.loss)
self.writer = tf.summary.FileWriter(
'./logs', self.sess.graph) # Tensorboard: Create writer
self.merge = tf.summary.merge([self.training_loss_summary]) # Tensorboard: Merge
def setupCNN(self):
""" Create CNN layers and return output of these layers """
cnnIn4d = tf.expand_dims(input=self.inputImgs, axis=3)
# First Layer: Conv (5x5) + Pool (2x2) - Output size: 400 x 32 x 64
with tf.name_scope('Conv_Pool_1'):
kernel = tf.Variable(
tf.random.truncated_normal([5, 5, 1, 64], stddev=0.1))
conv = tf.nn.conv2d(
cnnIn4d, kernel, padding='SAME', strides=(1, 1, 1, 1))
learelu = tf.nn.leaky_relu(conv, alpha=0.01)
pool = tf.nn.max_pool2d(learelu, (1, 2, 2, 1), (1, 2, 2, 1), 'VALID')
# Second Layer: Conv (5x5) + Pool (1x2) - Output size: 400 x 16 x 128
with tf.name_scope('Conv_Pool_2'):
kernel = tf.Variable(tf.truncated_normal(
[5, 5, 64, 128], stddev=0.1))
conv = tf.nn.conv2d(
pool, kernel, padding='SAME', strides=(1, 1, 1, 1))
learelu = tf.nn.leaky_relu(conv, alpha=0.01)
pool = tf.nn.max_pool(learelu, (1, 1, 2, 1), (1, 1, 2, 1), 'VALID')
# Third Layer: Conv (3x3) + Pool (2x2) + Simple Batch Norm - Output size: 200 x 8 x 128
with tf.name_scope('Conv_Pool_BN_3'):
kernel = tf.Variable(tf.truncated_normal(
[3, 3, 128, 128], stddev=0.1))
conv = tf.nn.conv2d(
pool, kernel, padding='SAME', strides=(1, 1, 1, 1))
mean, variance = tf.nn.moments(conv, axes=[0])
batch_norm = tf.nn.batch_normalization(
conv, mean, variance, offset=None, scale=None, variance_epsilon=0.001)
learelu = tf.nn.leaky_relu(batch_norm, alpha=0.01)
pool = tf.nn.max_pool(learelu, (1, 2, 2, 1), (1, 2, 2, 1), 'VALID')
# Fourth Layer: Conv (3x3) - Output size: 200 x 8 x 256
with tf.name_scope('Conv_4'):
kernel = tf.Variable(tf.truncated_normal(
[3, 3, 128, 256], stddev=0.1))
conv = tf.nn.conv2d(
pool, kernel, padding='SAME', strides=(1, 1, 1, 1))
learelu = tf.nn.leaky_relu(conv, alpha=0.01)
# Fifth Layer: Conv (3x3) + Pool(2x2) - Output size: 100 x 4 x 256
with tf.name_scope('Conv_Pool_5'):
kernel = tf.Variable(tf.truncated_normal(
[3, 3, 256, 256], stddev=0.1))
conv = tf.nn.conv2d(
learelu, kernel, padding='SAME', strides=(1, 1, 1, 1))
learelu = tf.nn.leaky_relu(conv, alpha=0.01)
pool = tf.nn.max_pool(learelu, (1, 2, 2, 1), (1, 2, 2, 1), 'VALID')
# Sixth Layer: Conv (3x3) + Pool(1x2) + Simple Batch Norm - Output size: 100 x 2 x 512
with tf.name_scope('Conv_Pool_BN_6'):
kernel = tf.Variable(tf.truncated_normal(
[3, 3, 256, 512], stddev=0.1))
conv = tf.nn.conv2d(
pool, kernel, padding='SAME', strides=(1, 1, 1, 1))
mean, variance = tf.nn.moments(conv, axes=[0])
batch_norm = tf.nn.batch_normalization(
conv, mean, variance, offset=None, scale=None, variance_epsilon=0.001)
learelu = tf.nn.leaky_relu(batch_norm, alpha=0.01)
pool = tf.nn.max_pool(learelu, (1, 1, 2, 1), (1, 1, 2, 1), 'VALID')
# Seventh Layer: Conv (3x3) + Pool (1x2) - Output size: 100 x 1 x 512
with tf.name_scope('Conv_Pool_7'):
kernel = tf.Variable(tf.truncated_normal(
[3, 3, 512, 512], stddev=0.1))
conv = tf.nn.conv2d(
pool, kernel, padding='SAME', strides=(1, 1, 1, 1))
learelu = tf.nn.leaky_relu(conv, alpha=0.01)
pool = tf.nn.max_pool(learelu, (1, 1, 2, 1), (1, 1, 2, 1), 'VALID')
self.cnnOut4d = pool
def setupRNN(self):
""" Create RNN layers and return output of these layers """
# Collapse layer to remove dimension 100 x 1 x 512 --> 100 x 512 on axis=2
rnnIn3d = tf.squeeze(self.cnnOut4d, axis=[2])
# 2 layers of LSTM cell used to build RNN
numHidden = 512
cells = [tf.contrib.rnn.LSTMCell(
num_units=numHidden, state_is_tuple=True, name='basic_lstm_cell') for _ in range(2)]
stacked = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
# Bi-directional RNN
# BxTxF -> BxTx2H
((forward, backward), _) = tf.nn.bidirectional_dynamic_rnn(
cell_fw=stacked, cell_bw=stacked, inputs=rnnIn3d, dtype=rnnIn3d.dtype)
# BxTxH + BxTxH -> BxTx2H -> BxTx1X2H
concat = tf.expand_dims(tf.concat([forward, backward], 2), 2)
# Project output to chars (including blank): BxTx1x2H -> BxTx1xC -> BxTxC
kernel = tf.Variable(tf.truncated_normal(
[1, 1, numHidden * 2, len(self.charList) + 1], stddev=0.1))
self.rnnOut3d = tf.squeeze(tf.nn.atrous_conv2d(value=concat, filters=kernel, rate=1, padding='SAME'), axis=[2])
def setupCTC(self):
""" Create CTC loss and decoder and return them """
# BxTxC -> TxBxC
self.ctcIn3dTBC = tf.transpose(self.rnnOut3d, [1, 0, 2])
# Ground truth text as sparse tensor
with tf.name_scope('CTC_Loss'):
self.gtTexts = tf.SparseTensor(tf.placeholder(tf.int64, shape=[
None, 2]), tf.placeholder(tf.int32, [None]), tf.placeholder(tf.int64, [2]))
# Calculate loss for batch
self.seqLen = tf.placeholder(tf.int32, [None])
self.loss = tf.reduce_mean(tf.nn.ctc_loss(labels=self.gtTexts, inputs=self.ctcIn3dTBC, sequence_length=self.seqLen,
ctc_merge_repeated=True, ignore_longer_outputs_than_inputs=True))
with tf.name_scope('CTC_Decoder'):
# Decoder: Best path decoding or Word beam search decoding
if self.decoderType == DecoderType.BestPath:
self.decoder = tf.nn.ctc_greedy_decoder(
inputs=self.ctcIn3dTBC, sequence_length=self.seqLen)
elif self.decoderType == DecoderType.BeamSearch:
self.decoder = tf.nn.ctc_beam_search_decoder(inputs=self.ctcIn3dTBC, sequence_length=self.seqLen, beam_width=50, merge_repeated=True)
elif self.decoderType == DecoderType.WordBeamSearch:
# Import compiled word beam search operation (see https://github.com/githubharald/CTCWordBeamSearch)
word_beam_search_module = tf.load_op_library(
'./TFWordBeamSearch.so')
# Prepare: dictionary, characters in dataset, characters forming words
chars = codecs.open(FilePaths.wordCharList.txt, 'r').read()
wordChars = codecs.open(
FilePaths.fnWordCharList, 'r').read()
corpus = codecs.open(FilePaths.corpus.txt, 'r').read()
# # Decoder using the "NGramsForecastAndSample": restrict number of (possible) next words to at most 20 words: O(W) mode of word beam search
# decoder = word_beam_search_module.word_beam_search(tf.nn.softmax(ctcIn3dTBC, dim=2), 25, 'NGramsForecastAndSample', 0.0, corpus.encode('utf8'), chars.encode('utf8'), wordChars.encode('utf8'))
# Decoder using the "Words": only use dictionary, no scoring: O(1) mode of word beam search
self.decoder = word_beam_search_module.word_beam_search(tf.nn.softmax(
self.ctcIn3dTBC, dim=2), 25, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'), wordChars.encode('utf8'))
# Return a CTC operation to compute the loss and CTC operation to decode the RNN output
return self.loss, self.decoder
def setupTF(self):
""" Initialize TensorFlow """
print('Python: ' + sys.version)
print('Tensorflow: ' + tf.__version__)
sess = tf.Session() # Tensorflow session
saver = tf.train.Saver(max_to_keep=3) # Saver saves model to file
modelDir = '../model/'
latestSnapshot = tf.train.latest_checkpoint(modelDir) # Is there a saved model?
# If model must be restored (for inference), there must be a snapshot
if self.mustRestore and not latestSnapshot:
raise Exception('No saved model found in: ' + modelDir)
# Load saved model if available
if latestSnapshot:
print('Init with stored values from ' + latestSnapshot)
saver.restore(sess, latestSnapshot)
else:
print('Init with new values')
sess.run(tf.global_variables_initializer())
return (sess, saver)
def toSpare(self, texts):
""" Convert ground truth texts into sparse tensor for ctc_loss """
indices = []
values = []
shape = [len(texts), 0] # Last entry must be max(labelList[i])
# Go over all texts
for (batchElement, texts) in enumerate(texts):
# Convert to string of label (i.e. class-ids)
# print(texts)
# labelStr = []
# for c in texts:
# print(c, '|', end='')
# labelStr.append(self.charList.index(c))
# print(' ')
labelStr = [self.charList.index(c) for c in texts]
# Sparse tensor must have size of max. label-string
if len(labelStr) > shape[1]:
shape[1] = len(labelStr)
# Put each label into sparse tensor
for (i, label) in enumerate(labelStr):
indices.append([batchElement, i])
values.append(label)
return (indices, values, shape)
def decoderOutputToText(self, ctcOutput):
""" Extract texts from output of CTC decoder """
# Contains string of labels for each batch element
encodedLabelStrs = [[] for i in range(Model.batchSize)]
# Word beam search: label strings terminated by blank
if self.decoderType == DecoderType.WordBeamSearch:
blank = len(self.charList)
for b in range(Model.batchSize):
for label in ctcOutput[b]:
if label == blank:
break
encodedLabelStrs[b].append(label)
# TF decoders: label strings are contained in sparse tensor
else:
# Ctc returns tuple, first element is SparseTensor
decoded = ctcOutput[0][0]
# Go over all indices and save mapping: batch -> values
idxDict = {b : [] for b in range(Model.batchSize)}
for (idx, idx2d) in enumerate(decoded.indices):
label = decoded.values[idx]
batchElement = idx2d[0] # index according to [b,t]
encodedLabelStrs[batchElement].append(label)
# Map labels to chars for all batch elements
return [str().join([self.charList[c] for c in labelStr]) for labelStr in encodedLabelStrs]
def trainBatch(self, batch, batchNum):
""" Feed a batch into the NN to train it """
sparse = self.toSpare(batch.gtTexts)
rate = 0.01 if self.batchesTrained < 10 else (
0.001 if self.batchesTrained < 2750 else 0.001)
evalList = [self.merge, self.optimizer, self.loss]
feedDict = {self.inputImgs( batch.imgs), self.gtTexts( sparse), self.seqLen ([Model.maxTextLen] * Model.batchSize), self.learningRate( rate)}
(loss_summary, _, lossVal) = self.sess.run(evalList, feedDict)
# Tensorboard: Add loss_summary to writer
self.writer.add_summary(loss_summary, batchNum)
self.batchesTrained += 1
return lossVal
def return_rnn_out(self, batch, write_on_csv=False):
"""Only return rnn_out prediction value without decoded"""
numBatchElements = len(batch.imgs)
decoded, rnnOutput = self.sess.run([self.decoder, self.ctcIn3dTBC],
{self.inputImgs: batch.imgs, self.seqLen: [Model.maxTextLen] * numBatchElements})
decoded = rnnOutput
print(decoded.shape)
if write_on_csv:
s = rnnOutput.shape
b = 0
csv = ''
for t in range(s[0]):
for c in range(s[2]):
csv += str(rnnOutput[t, b, c]) + ';'
csv += '\n'
open('mat_0.csv', 'w').write(csv)
return decoded[:,0,:].reshape(100,80)
def inferBatch(self, batch):
""" Feed a batch into the NN to recognize texts """
numBatchElements = len(batch.imgs)
feedDict = {self.inputImgs: batch.imgs, self.seqLen: [Model.maxTextLen] * numBatchElements}
evalRes = self.sess.run([self.decoder, self.ctcIn3dTBC], feedDict)
decoded = evalRes[0]
# # Dump RNN output to .csv file
# decoded, rnnOutput = self.sess.run([self.decoder, self.rnnOutput], {
# self.inputImgs: batch.imgs, self.seqLen: [Model.maxTextLen] * Model.batchSize})
# s = rnnOutput.shape
# b = 0
# csv = ''
# for t in range(s[0]):
# for c in range(s[2]):
# csv += str(rnnOutput[t, b, c]) + ';'
# csv += '\n'
# open('mat_0.csv', 'w').write(csv)
texts = self.decoderOutputToText(decoded)
return texts
def save(self):
""" Save model to file """
self.snapID += 1
self.saver.save(self.sess, r'C:\Users\PycharmProjects\hand\model\snapshot',
global_step=self.snapID)
You can run tf1 code in tf2 by importing tf a bit differently:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
For details on how to migrate your code you should look here: https://www.tensorflow.org/guide/migrate
I'm using one convolution layer and one fully connected layer in CNN. where as there are two output nodes. I'm using one input channel and 3 filter channel in the convolutional layer(1D convolution). When I store final weight matrix of fully connected layer it has shape (36,2). Whereas a single input has 12 features. Now I want to plot filter weights attached to 1st channel, second channel and third channel separately. If I plot first 12 weights Does it mean they correspond to the 1 class of first channel?
`
def weight_variable(shape):
initial = tf.truncated_normal(shape, mean=0, stddev=0.1)
return tf.Variable(initial)
def conv1d(input, filter):
return tf.nn.conv1d(input, filter, stride=1, padding='SAME')
x = tf.placeholder(tf.float32, [None, FLAGS.image_width])
y_ = tf.placeholder(tf.float32, [None, 2])
input = tf.reshape(x, [-1, FLAGS.image_width, FLAGS.input_channel])
filter = weight_variable([FLAGS.filter_width, FLAGS.input_channel,
FLAGS.filter_channel])
conv_out = tf.nn.tanh(conv1d(input, filter))
#Fully_Connected_layer
dim = conv_out.get_shape().as_list()
conv_re = tf.reshape(conv_out, (-1, dim[1]*dim[2]))
W_fc = weight_variable([dim[1]*dim[2], 2])
logits = tf.matmul(conv_re, W_fc)
y_prime = tf.nn.softmax(logits)
#Cross_entropy:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels= y_)
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.GradientDescentOptimizer(FLAGS.rLearn).minimize(loss)
#Check_predictions:
correct_prediction = tf.equal(tf.argmax(y_prime, axis=1),tf.argmax(y_,
axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))`
`W = W.eval() #shape (36,2)
W1 = W[0:12,0]
W2 = W[12:24,0]
W3 = W[24:36,0]`
[TF 1.8]
I'm trying to build a seq2seq model for a toy chatbot to learn about tensorflow and deep learning. I was able to train and run the model with sampled softmax and beam search but then I try to apply tf.contrib.seq2seq.LuongAttention using tf.contrib.seq2seq.AttentionWrapper I get the following error while building the graph:
ValueError: Dimensions must be equal, but are 384 and 256 for 'rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/MatMul_2' (op: 'MatMul') with input shapes: [64,384], [256,512].
This is my model:
class ChatBotModel:
def __init__(self, inferring=False, batch_size=1, use_sample_sofmax=True):
"""forward_only: if set, we do not construct the backward pass in the model.
"""
print('Initialize new model')
self.inferring = inferring
self.batch_size = batch_size
self.use_sample_sofmax = use_sample_sofmax
def build_graph(self):
# INPUTS
self.X = tf.placeholder(tf.int32, [None, None])
self.Y = tf.placeholder(tf.int32, [None, None])
self.X_seq_len = tf.placeholder(tf.int32, [None])
self.Y_seq_len = tf.placeholder(tf.int32, [None])
self.gl_step = tf.Variable(
0, dtype=tf.int32, trainable=False, name='global_step')
single_cell = tf.nn.rnn_cell.BasicLSTMCell(128)
keep_prob = tf.cond(tf.convert_to_tensor(self.inferring, tf.bool), lambda: tf.constant(
1.0), lambda: tf.constant(0.8))
single_cell = tf.contrib.rnn.DropoutWrapper(
single_cell, output_keep_prob=keep_prob)
encoder_cell = tf.contrib.rnn.MultiRNNCell([single_cell for _ in range(2)])
# ENCODER
encoder_out, encoder_state = tf.nn.dynamic_rnn(
cell = encoder_cell,
inputs = tf.contrib.layers.embed_sequence(self.X, 10000, 128),
sequence_length = self.X_seq_len,
dtype = tf.float32)
# encoder_state is ((cell0_c, cell0_h), (cell1_c, cell1_h))
# DECODER INPUTS
after_slice = tf.strided_slice(self.Y, [0, 0], [self.batch_size, -1], [1, 1])
decoder_inputs = tf.concat( [tf.fill([self.batch_size, 1], 2), after_slice], 1)
# ATTENTION
attention_mechanism = tf.contrib.seq2seq.LuongAttention(
num_units = 128,
memory = encoder_out,
memory_sequence_length = self.X_seq_len)
# DECODER COMPONENTS
Y_vocab_size = 10000
decoder_cell = tf.contrib.rnn.MultiRNNCell([single_cell for _ in range(2)])
decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
cell = decoder_cell,
attention_mechanism = attention_mechanism,
attention_layer_size=128)
decoder_embedding = tf.Variable(tf.random_uniform([Y_vocab_size, 128], -1.0, 1.0))
projection_layer = CustomDense(Y_vocab_size)
if self.use_sample_sofmax:
softmax_weight = projection_layer.kernel
softmax_biases = projection_layer.bias
if not self.inferring:
# TRAINING DECODER
training_helper = tf.contrib.seq2seq.TrainingHelper(
inputs = tf.nn.embedding_lookup(decoder_embedding, decoder_inputs),
sequence_length = self.Y_seq_len,
time_major = False)
decoder_initial_state = decoder_cell.zero_state(self.batch_size, dtype=tf.float32).clone(
cell_state=encoder_state)
training_decoder = tf.contrib.seq2seq.BasicDecoder(
cell = decoder_cell,
helper = training_helper,
initial_state = decoder_initial_state,
output_layer = projection_layer
)
training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder = training_decoder,
impute_finished = True,
maximum_iterations = tf.reduce_max(self.Y_seq_len))
training_logits = training_decoder_output.rnn_output
# LOSS
softmax_loss_function = None
if self.use_sample_sofmax:
def sampled_loss(labels, logits):
labels = tf.reshape(labels, [-1, 1])
return tf.nn.sampled_softmax_loss(weights=softmax_weight,
biases=softmax_biases,
labels=labels,
inputs=logits,
num_sampled=64,
num_classes=10000)
softmax_loss_function = sampled_loss
masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
self.loss = tf.contrib.seq2seq.sequence_loss(logits = training_logits, targets = self.Y, weights = masks, softmax_loss_function=softmax_loss_function)
# BACKWARD
params = tf.trainable_variables()
gradients = tf.gradients(self.loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
self.train_op = tf.train.AdamOptimizer().apply_gradients(zip(clipped_gradients, params), global_step=self.gl_step)
else:
encoder_states = []
for i in range(2):
if isinstance(encoder_state[i],tf.contrib.rnn.LSTMStateTuple):
encoder_state_c = tf.contrib.seq2seq.tile_batch(encoder_state[i].c, multiplier=2)
encoder_state_h = tf.contrib.seq2seq.tile_batch(encoder_state[i].h, multiplier=2)
encoder_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
encoder_states.append(encoder_state)
encoder_states = tuple(encoder_states)
predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
cell = decoder_cell,
embedding = decoder_embedding,
start_tokens = tf.tile(tf.constant([2], dtype=tf.int32), [self.batch_size]),
end_token = 3,
initial_state = decoder_initial_state,
beam_width = 2,
output_layer = projection_layer,
length_penalty_weight = 0.0)
predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder = predicting_decoder,
impute_finished = False,
maximum_iterations = 4 * tf.reduce_max(self.Y_seq_len))
self.predicting_logits = predicting_decoder_output.predicted_ids
Tracing back a few lines of log and I saw that the error occurs here:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
636
637 gate_inputs = math_ops.matmul(
--> 638 array_ops.concat([inputs, h], 1), self._kernel)
639 gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
I have checked the 'h' tensor of the LSTM cell and it has the shape of [batch_size, 128] so my guess is that the attention output from the previous decoding step is concatenated with the current encoder's input make the 'inputs' has the shape of [batch_size, 256] then it is concatenated with 'h' tensor to form a [batch_size, 384] tensor causing this error.
My question is: Isn't attention output supposed to be concatenated with the next decoder's input or I miss understanding anything? And how to fix this error.
you probably already found the answer but for peeps (like me) who also encounter this error, focus on the second shape. It specifies [256,512]. Now open up the code to "rnn_cell_impl.py" and go to the line where the concat op is taking place. You will notice that the kernel shape is the one being reported as being out of sync with your decoder input( which has num_units+attention_layer_size as the 1st dimension , 0th being your batch_size).
Basically you are using the same cell you created for the encoder unit in the decoder as well (its a 2 layer lstm with 128 right?) hence the kernel size shows up as 256,512. To fix this, in the line between these 2, add
Y_vocab_size = 10000
## create new decoder base rnn cell
decode_op_cell = tf.nn.rnn_cell.BasicLSTMCell(128)
## create new decoder base rnn cell
decoder_cell = tf.contrib.rnn.MultiRNNCell([decode_op_cell for _ in range(2)])
Now if you can visualize the code at the same line which gave you the error, you will see [64, 384] and [384, 512] ( which is a legit mat mul op and should fix your error) Of course, whatever dropout etc you want to add, feel free to add to this decode_op_cell as well.
Suppose I am trying to connect the output of a pooling layer to a dense layer. In order to do this, I need to flatten the pooled tensor. Consider the layers below:
def conv_layer(input, in_channels, out_channels, name="conv"):
w = tf.get_variable("W", initializer=tf.truncated_normal([3, 3, in_channels, out_channels], stddev=0.1))
b = tf.get_variable("B", initializer=tf.constant(0.1, shape=[out_channels]))
conv = tf.nn.conv2d(input, w, strides=[1,1,1,1], padding="SAME")
act = tf.nn.relu(conv + b)
return act
def pool_layer(input, name="pool"):
pool = tf.nn.max_pool(input, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
return pool
def dense_layer(input, size_in, size_out, name="dense"):
w = tf.get_variable("W", initializer=tf.truncated_normal([size_in, size_out], stddev=0.1))
b = tf.get_variable("B", initializer=tf.constant(0.1, shape=[size_out]))
act = tf.nn.relu(tf.matmul(input, w) + b)
return act
I am using them to create a network:
def cnn_model(x):
x_image = tf.reshape(x, [-1, nseries, present_window, 1])
conv1 = conv_layer(x_image, 1, 32, "conv1")
pool1 = pool_layer(conv1, "pool1")
conv2 = conv_layer(pool1, 32, 64, "conv2")
pool2 = pool_layer(conv2, "pool2")
nflat = 17*15*64 # hard-coded
flat = tf.reshape(pool2, [-1, nflat])
yhat = dense_layer(flat, nflat, future_window, "dense1")
return yhat
As you can see I am hard-coding the variable nflat. How to avoid this?
If it's a tensor pool.get_shape() should work on Keras or Tensorflow.
This will actually return a tuple with the size of each dimension, so you need to choose from it, probably it's the 2nd in your case.
If input is actually your input (without any other layer), why are you max-pooling? aren't you looking for dropout ?
Indeed you will find a problem if your batch size is variable, since there's no way of telling the model the size of the reshape