I am trying to convert code from Keras to tensorflow, I don't have much idea about Keras api, I am a Tensorflow user, Here is Keras code :
rawmeta = layers.Input(shape=(1,), dtype="string")
emb = elmolayer()(rawmeta)
d1 = layers.Dense(256, activation='relu')(emb)
yhat = layers.Dense(31, activation='softmax', name = "output_node")(d1)
model = Model(inputs=[rawmeta], outputs=yhat)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
Where elmolayer defined as follows :
class elmolayer(Layer):
def __init__(self, **kwargs):
self.dimensions = 1024
self.trainable=True
super(elmolayer, self).__init__(**kwargs)
def build(self, input_shape):
self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
name="{}_module".format(self.name))
self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
super(elmolayer, self).build(input_shape)
def call(self, x, mask=None):
result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
as_dict=True,
signature='default',
)['default']
return result
def compute_mask(self, inputs, mask=None):
return K.not_equal(inputs, '--PAD--')
def compute_output_shape(self, input_shape):
return (input_shape[0], self.dimensions)
My Tensorflow implementation of this code is :
class Base_model(object):
def __init__(self, elmo_embedding_matrix):
tf.reset_default_graph()
# define placeholders
sentences = tf.placeholder(tf.int32, [None, None], name='sentences')
y_true = tf.placeholder(tf.int32, [None, None], name='labels' )
self.elmo = tf.get_variable(name="relation_embedding", shape=[elmo_embedding_matrix.shape[0],elmo_embedding_matrix.shape[1]],
initializer=tf.constant_initializer(np.array(elmo_embedding_matrix)),
trainable=True,dtype=tf.float32)
embedding_lookup = tf.nn.embedding_lookup(self.elmo,sentences)
d1 = tf.layers.dense(embedding_lookup, 256, tf.nn.relu)
y_pred = tf.layers.dense(d1, 31, tf.nn.softmax)
matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
acc = tf.reduce_mean(tf.cast(matches,tf.float32))
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true,logits=y_pred))
train = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cross_entropy)
My confusion is the last dense layer in keras model is :
yhat = layers.Dense(31, activation='softmax', name = "output_node")(d1)
While in tensorflow code if i am using tf.nn.softmax_cross_entropy_with_logits_v2 then should i pass second dense layer to softmax eg.,
y_pred = tf.layers.dense(d1, 31, tf.nn.softmax)
Because if i am using softmax here then tf.nn.softmax_cross_entropy_with_logits_v2 will use softmax again on logits.
How to convert that Keras code to Tensorflow?
Specifying the comment here (Answer Section) even though it is present in Comments Section, for the benefit of the Community.
The Tensorflow equivalent Code for the Keras Code to represent Output Layer,
yhat = layers.Dense(31, activation='softmax', name = "output_node")(d1)
is
y_logits = tf.layers.dense(d1, 31, tf.nn.softmax)
y_pred = tf.nn.softmax(y_logits)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true,logits=y_logits))
Hope this helps. Happy Learning!
Related
I made this function that incorporates a resnet into a model. It works well, and I can save it.
My problem is that I can't load it because it needs a call function. I am not exactly sure of how to turn this into a class. The attempt is at the bottom. some pointers would be helpful.
def build_network():
inp = Input(shape=(256,256,3))
resnet = tf.keras.applications.ResNet152V2(
include_top=False, weights='imagenet', input_tensor=None,
input_shape=(256,256,3), pooling=None, classes=1000
)
# classifier_activation='softmax'
x = resnet(inp)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(9, activation='softmax')(x)
model = tf.keras.Model(inputs=inp,outputs = x)
opt = tf.keras.optimizers.SGD(momentum=0.9)
# optimizer = 'adam',
model.compile(loss='categorical_crossentropy',
optimizer = opt,
metrics=['accuracy'])
model.summary()
return model
class Resnet(tf.keras.Model):
def __init__(self, num_classes=9):
super(Resnet, self).__init__()
self.block_1 = tf.keras.applications.ResNet152V2(
include_top=False, weights='imagenet', input_tensor=None,
input_shape=(256,256,3), pooling=None, classes=1000)
self.global_pool = layers.GlobalAveragePooling2D()
self.dropout = Dropout(0.3)
self.classifier = Dense(num_classes, activation = 'softmax')
def call(self, inputs):
x = self.block_1(inputs)
x = self.global_pool(x)
x = self.dropout(x)
x = self.classifier(x)
return tf.keras.Model(inputs = inputs, outputs = x)
Using the subclassing API will actually make your model unserializable (see the "Limitations section in the "What are Symbolic and Imperative APIs in TensorFlow 2.0? " blogpost):
Imperative models are also more difficult to inspect, copy, or clone.
For example, model.save(), model.get_config(), and clone_model do not work for subclassed models. Likewise, model.summary() only gives you a list of layers (and doesn’t provide information on how they’re connected, since that’s not accessible).
Edit: From Tensorflow 2.4, it is possible to pass a save_traces argument to model.save to serialize models built using the subclassing API. See https://www.tensorflow.org/guide/keras/save_and_serialize#how_savedmodel_handles_custom_objects.
Here's a simple example of how you can do this:
import tensorflow as tf
from tensorflow.keras.layers import (Dense, Dropout, GlobalAveragePooling2D,
Input)
def build_network():
inp = Input(shape=(256, 256, 3))
resnet = tf.keras.applications.ResNet152V2(include_top=False,
weights="imagenet",
input_tensor=None,
input_shape=(256, 256, 3),
pooling=None,
classes=1000)
# classifier_activation="softmax"
x = resnet(inp)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(9, activation="softmax")(x)
model = tf.keras.Model(inputs=inp, outputs=x)
# optimizer = "adam",
opt = tf.keras.optimizers.SGD(momentum=0.9)
model.compile(loss="categorical_crossentropy",
optimizer=opt,
metrics=["accuracy"])
model.summary()
return model
if __name__ == "__main__":
model = build_network()
model.summary()
# Save
model.save("my_model.h5")
# Load
loaded_model = tf.keras.models.load_model("my_model.h5")
loaded_model.summary()
To load your saved model from your build_network function use tf.keras.models.load_model.
I am using tensorflow 2.0 and trying to evaluate gradients for backpropagating to a simple feedforward neural network. Here's how my model looks like:
def __init__(self, input_size, output_size):
inputs = tf.keras.Input(shape=(input_size,))
hidden_layer1 = tf.keras.layers.Dense(30, activation='relu')(inputs)
outputs = tf.keras.layers.Dense(output_size)(hidden_layer1)
self.model = tf.keras.Model(inputs=inputs, outputs=outputs)
self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
self.loss_function = tf.keras.losses.Huber()
The forward pass to this network is fine but when I use gradient tape to train the model, it is at least 10x slower than PyTorch.
Training function:
def learn_modified_x(self, inputs, targets, actions):
with tf.GradientTape() as tape:
predictions = self.model(inputs)
predictions_for_action = gather_single_along_axis(predictions, actions)
loss = self.loss_function(targets, predictions_for_action)
grads = tape.gradient(loss, self.model.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights))
I tried commenting lines to find what is actually causing the problem. I discovered that tape.gradient is a significant contributor to this situation.
Any idea?
PyTorch implementation
def __init__(self, input_size, nb_action):
super(Network, self).__init__()
self.input_size = input_size
self.nb_action = nb_action
self.fc1 = nn.Linear(input_size, 30)
self.fc2 = nn.Linear(30, nb_action)
def forward(self, state):
x = F.relu(self.fc1(state))
q_values = self.fc2(x)
return q_values
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
next_outputs = self.model(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
td_loss = F.smooth_l1_loss(outputs, target)
self.optimizer.zero_grad()
td_loss.backward(retain_variables = True)
self.optimizer.step()
def __init__(self,...):
...
self.model.call = tf.function(self.model.call)
...
you need use tf.function to wrap your model's call function.
I have a TF 1.0.1 code of seq2seq model. I am trying to rewrite it using Tensorflow Keras.
TF 1.0.1 code has following decoder architecure:
with tf.variable_scope("decoder_scope") as decoder_scope:
# output projection
# we need to specify output projection manually, because sampled softmax needs to have access to the the projection matrix
output_projection_w_t = tf.get_variable("output_projection_w", [vocabulary_size, state_size], dtype=DTYPE)
output_projection_w = tf.transpose(output_projection_w_t)
output_projection_b = tf.get_variable("output_projection_b", [vocabulary_size], dtype=DTYPE)
decoder_cell = tf.contrib.rnn.LSTMCell(num_units=state_size)
decoder_cell = DtypeDropoutWrapper(cell=decoder_cell, output_keep_prob=tf_keep_probabiltiy, dtype=DTYPE)
decoder_cell = contrib_rnn.MultiRNNCell(cells=[decoder_cell] * num_lstm_layers, state_is_tuple=True)
# define decoder train netowrk
decoder_outputs_tr, _ , _ = dynamic_rnn_decoder(
cell=decoder_cell,
decoder_fn= simple_decoder_fn_train(last_encoder_state, name=None),
inputs=decoder_inputs,
sequence_length=decoder_sequence_lengths,
parallel_iterations=None,
swap_memory=False,
time_major=False)
# define decoder inference network
decoder_scope.reuse_variables()
Here is how the sampled_softmax_loss is calculated:
decoder_forward_outputs = tf.reshape(decoder_outputs_tr,[-1, state_size])
decoder_target_labels = tf.reshape(decoder_labels ,[-1, 1]) #decoder_labels is target sequnce of decoder
sampled_softmax_losses = tf.nn.sampled_softmax_loss(
weights = output_projection_w_t,
biases = output_projection_b,
inputs = decoder_forward_outputs,
labels = decoder_target_labels ,
num_sampled = 500,
num_classes=vocabulary_size,
num_true = 1,
)
total_loss_op = tf.reduce_mean(sampled_softmax_losses)
And, this is my decoder in Keras:
decoder_inputs = tf.keras.Input(shape=(None,), name='decoder_input')
emb_layer = tf.keras.layers.Embedding(vocabulary_size, state_size)
x_d = emb_layer(decoder_inputs)
d_lstm_layer = tf.keras.layers.LSTM(embed_dim, return_sequences=True)
d_lstm_out = d_lstm_layer(x_d, initial_state=encoder_states)
This is my sampled_softmax_loss function I use for Keras model:
class SampledSoftmaxLoss(object):
def __init__(self, model):
self.model = model
output_layer = model.layers[-1]
self.input = output_layer.input
self.weights = output_layer.weights
def loss(self, y_true, y_pred, **kwargs):
loss = tf.nn.sampled_softmax_loss(
weights=self.weights[0],
biases=self.weights[1],
labels=tf.reshape(y_true ,[-1, 1]),
inputs=tf.reshape(d_lstm_out,[-1, state_size]),
num_sampled = 500,
num_classes = vocabulary_size
)
But, it does not work.
Can anyone help me to implement sampled_loss_funtion in Keras correctly.
/pytorch/aten/src/ATen/native/cudnn/RNN.cpp:1266: UserWarning: RNN module weights are not part of single contiguous chunk of memory.
This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().
Hello. I am using pytorch.
I am trying to use DataParallel function in pytorch,
but the model is LSTM. I'm warned to flatten the model again,
but I don't know when and where to flatten.
Can you let me know?
This is my model
import torch.nn as nn
from torchvision import models
class ConvLstm(nn.Module):
def __init__(self, latent_dim, model, hidden_size, lstm_layers, bidirectional, n_class):
super(ConvLstm, self).__init__()
self.conv_model = Pretrained_conv(latent_dim, model)
self.Lstm = Lstm(latent_dim, hidden_size, lstm_layers, bidirectional)
self.output_layer = nn.Sequential(
nn.Linear(2 * hidden_size if bidirectional ==
True else hidden_size, n_class),
nn.Softmax(dim=-1)
)
def forward(self, x):
batch_size, timesteps, channel_x, h_x, w_x = x.shape
conv_input = x.view(batch_size * timesteps, channel_x, h_x, w_x)
conv_output = self.conv_model(conv_input)
lstm_input = conv_output.view(batch_size, timesteps, -1)
lstm_output = self.Lstm(lstm_input)
lstm_output = lstm_output[:, -1, :]
output = self.output_layer(lstm_output)
return output
class Pretrained_conv(nn.Module):
def __init__(self, latent_dim, model):
if model == 'resnet152':
super(Pretrained_conv, self).__init__()
self.conv_model = models.resnet152(pretrained=True)
# ====== freezing all of the layers ======
for param in self.conv_model.parameters():
param.requires_grad = False
# ====== changing the last FC layer to an output with the size we need. this layer is un freezed ======
self.conv_model.fc = nn.Linear(
self.conv_model.fc.in_features, latent_dim)
def forward(self, x):
return self.conv_model(x)
class Lstm(nn.Module):
def __init__(self, latent_dim, hidden_size, lstm_layers, bidirectional):
super(Lstm, self).__init__()
self.Lstm = nn.LSTM(latent_dim, hidden_size=hidden_size,
num_layers=lstm_layers, batch_first=True, bidirectional=bidirectional)
self.hidden_state = None
def reset_hidden_state(self):
self.hidden_state = None
def forward(self, x):
output, self.hidden_state = self.Lstm(x, self.hidden_state)
return output
Enter LSTM and execute the following code.
def foward_step(model, images, labels, criterion, mode=''):
model.module.Lstm.reset_hidden_state()
if mode == 'test':
with torch.no_grad():
output = model(images)
else:
output = model(images)
loss = criterion(output, labels)
# Accuracy calculation
predicted_labels = output.detach().argmax(dim=1)
acc = (predicted_labels == labels).cpu().numpy().sum()
return loss, acc, predicted_labels.cpu()
This is main
model = nn.DataParallel(model, device_ids=[0,1,2,3]).cuda()
I want use keras for deep forward neural network but without using model fit. somthing like this but using keras not tf.layer:
h_1 = tf.layers.dense(inputs=inputs, units=self.n_1, activation=tf.nn.leaky_relu, kernel_regularizer=regularizer)
h_2 = tf.layers.dense(inputs=h_1, units=self.n_2, activation=tf.nn.leaky_relu, kernel_regularizer=regularizer)
h_3 = tf.layers.dense(inputs=h_2, units=self.n_3, activation=tf.nn.leaky_relu, kernel_regularizer=regularizer)
h_4 = tf.layers.dense(inputs=h_3, units=self.n_3, activation=tf.nn.leaky_relu, kernel_regularizer=regularizer)
out = tf.layers.dense(inputs=h_4, units=self.a_dim, activation=tf.nn.tanh, kernel_regularizer=regularizer)
this is class that I have built for keras neurL network:
from keras import backend as K
from keras.layers import Layer
from keras import activations
import tensorflow as tf
class actorLayer(Layer):
def __init__(self, output_dim,activation=None, **kwargs):
self.output_dim = output_dim
self.activation = activations.get(activation)
#self.batch_size = batch_size
super(actorLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], self.output_dim),
initializer='uniform',
trainable=True)
super(actorLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
#print("----------")
#print(tf.shape(x))
#print(tf.shape(self.kernel))
#print("----------")
return self.activation(K.dot(x, self.kernel))
def compute_output_shape(self, input_shape):
return (input_shape, self.output_dim)
and this is how I use it:
x_in = Input(shape=(7,))
x = actorLayer(128,activation='relu')(x_in)
x = actorLayer(64,activation='relu')(x)
x = actorLayer(64,activation='relu')(x)
x = actorLayer(16)(x)
x = actorLayer(2)(x)
I need sth like tf.layer that I can use its output(out in above code)
I use model = Model(inputs = x_in,outputs = x) to build my model but I don not know how to use its output and return out and I can not use model fit.
I have used out = model(input) but it does not work. I use this NN for reinforcement learning and I need to return its output and then optimize its weights