I am trying out the Dataset API for my input pipeline shown in the TensorFlow documentation and use almost the same code:
tr_data = Dataset.from_tensor_slices((train_images, train_labels))
tr_data = tr_data.map(input_parser, NUM_CORES, output_buffer_size=2000)
tr_data = tr_data.batch(BATCH_SIZE)
tr_data = tr_data.repeat(EPOCHS)
iterator = dataset.make_one_shot_iterator()
next_example, next_label = iterator.get_next()
# Script throws error here
loss = model_function(next_example, next_label)
with tf.Session(...) as sess:
sess.run(tf.global_variables_initializer())
while True:
try:
train_loss = sess.run(loss)
except tf.errors.OutOfRangeError:
print("End of training dataset.")
break
This should be faster since it avoids using the slow feed_dicts. But I can't make it work with my model, which is a simplified LeNet architecture. The problem is the tf.layers.dense in my model_function() which expects an known input shape (I guess because it has to know the number of weights beforehand). But next_example and next_label only get their shape by running them in the session. Before evaluating them their shape is just undefined ?
Declaring the model_function() throws this error:
ValueError: The last dimension of the inputs to Dense should be
defined. Found None.
Right now, I don't know if I am using this Dataset API in the intended way or if there is a workaround.
Thanks in advance!
Edit 1:
Below is my model and it throws the error at the first dense layer
def conv_relu(input, kernel_shape):
# Create variable named "weights".
weights = tf.get_variable("weights", kernel_shape,
initializer=tf.random_normal_initializer())
# Create variable named "biases".
biases = tf.get_variable("biases", kernel_shape[3],
initializer=tf.constant_initializer(0.0))
conv = tf.nn.conv2d(input, weights,
strides=[1, 1, 1, 1], padding='VALID')
return tf.nn.relu(conv + biases)
def fully(input, output_dim):
assert len(input.get_shape())==2, 'Wrong input shape, need flattened tensor as input'
input_dim = input.get_shape()[1]
weight = tf.get_variable("weight", [input_dim, output_dim],
initializer=tf.random_normal_initializer())
bias = tf.get_variable('bias', [output_dim],
initializer=tf.random_normal_initializer())
fully = tf.nn.bias_add(tf.matmul(input, weight), bias)
return fully
def simple_model(x):
with tf.variable_scope('conv1'):
conv1 = conv_relu(x, [3,3,1,10])
conv1 = tf.nn.max_pool(conv1,[1,2,2,1],[1,2,2,1],'SAME')
with tf.variable_scope('conv2'):
conv2 = conv_relu(conv1, [3,3,10,10])
conv2 = tf.nn.max_pool(conv2,[1,2,2,1],[1,2,2,1],'SAME')
with tf.variable_scope('conv3'):
conv3 = conv_relu(conv2, [3,3,10,10])
conv3 = tf.nn.max_pool(conv3,[1,2,2,1],[1,2,2,1],'SAME')
flat = tf.contrib.layers.flatten(conv3)
with tf.variable_scope('fully1'):
fully1 = tf.layers.dense(flat, 1000)
fully1 = tf.nn.relu(fully1)
with tf.variable_scope('fully2'):
fully2 = tf.layers.dense(fully1, 100)
fully2 = tf.nn.relu(fully2)
with tf.variable_scope('output'):
output = tf.layers.dense(fully2, 4)
fully1 = tf.nn.relu(output)
return output
Edit 2:
Here you see the print of the tensors. Notice that next_example does not have a shape
next_example: Tensor("IteratorGetNext:0", dtype=float32)
next_label: Tensor("IteratorGetNext:1", shape=(?, 4), dtype=float32)
I found the answer myself.
Following this thread the easy fix is to just set the shape with tf.Tensor.set_shape if you know your image sizes beforehand.
def input_parser(img_path, label):
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_image(img_file, channels=1)
img_decoded = tf.image.convert_image_dtype(img_decoded, dtype=tf.float32)
img_decoded.set_shape([90,160,1]) # This line was missing
return img_decoded, label
It would have been nice if the tensorflow documentation included this line.
Related
I want to make a model like the below picture. (simplified)
So, practically, I want the weights with the same names to always have the same values during training. What I did was the code below:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
example_train_features = np.arange(12000).reshape(1000, 12)
example_lanbels = np.random.randint(2, size=1000) #these data are just for illustration purposes
train_ds = tf.data.Dataset.from_tensor_slices((example_train_features, example_lanbels)).shuffle(buffer_size = 1000).batch(32)
dense1 = layers.Dense(1, activation="relu") #input shape:4
dense2 = layers.Dense(2, activation="relu") #input shape:1
dense3 = layers.Dense(1, activation="sigmoid") #input shape:6
feature_input = keras.Input(shape=(12,), name="features")
nodes_list = []
for i in range(3):
first_lvl_input = feature_input[i :: 4] ######## marked line
out1 = dense1(first_lvl_input)
out2 = dense2(out1)
nodes_list.append(out2)
joined = layers.concatenate(nodes_list)
final_output = dense3(joined)
model = keras.Model(inputs = feature_input, outputs = final_output, name="extrema_model")
compile_and_fit(model, train_ds, val_ds, patience=4)
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.RMSprop(),
metrics=keras.metrics.BinaryAccuracy())
history = model.fit(train_ds, epochs=10, validation_data=val_ds)
But when I try to run this code I get this error:
MklConcatOp : Dimensions of inputs should match: shape[0][0]= 71 vs. shape[18][0] = 70
[[node extrema_model/concatenate_2/concat (defined at <ipython-input-373-5efb41d312df>:398) ]] [Op:__inference_train_function_15338]
(please don't pay attention to numbers as they are from my real code) this is because it gets the whole data including the labels as an input, but shouldn't Keras only feed the features itself? Anyway, if I write the marked line as below:
first_lvl_input = feature_input[i :12: 4]
it doesn't give me the above error anymore. But, then I get another error which I know why happens but I don't know how to resolve it.
InvalidArgumentError: Incompatible shapes: [4,1] vs. [32,1]
[[node gradient_tape/binary_crossentropy/logistic_loss/mul/BroadcastGradientArgs
(defined at <ipython-input-1-b82546367b3c>:398) ]] [Op:__inference_train_function_6098]
This is because keras is feeding again the whole batch array, whereas in Keras documentation it is written you shouldn't specify the batch dimension for the program as it understands itself, so I expected Keras to feed the data one by one for my code to work. So I appreciate any ideas on how to resolve this or on how to write a code for what I want. Thanks.
You can wrap the dense layers in timedistributed wrapper , and reshape your data to have three dimensions (1000,3,4)(batch, sequence, feature), so for each time step (=3 that replace your for loop code .) the four features will be multiplied with the same weights each time.
example_train_features = np.arange(12000).reshape(1000, 3, 4 )
example_lanbels = np.random.randint(2, size=1000) #these data are just for illustration purposes
train_ds = tf.data.Dataset.from_tensor_slices((example_train_features, example_lanbels)).shuffle(buffer_size = 1000).batch(32)
dense1 = layers.TimeDistributed(layers.Dense(1, activation="relu")) #input shape:4
dense2 =layers.TimeDistributed(layers.Dense(2, activation="relu")) #input shape:1
dense3 = layers.Dense(1, activation="sigmoid") #input shape:6
feature_input = keras.Input(shape=(3,4), name="features")
out1 = dense1(feature_input)
out2 = dense2(out1)
z = layers.Flatten()(out2)
final_output = dense3(z)
model = keras.Model(inputs = feature_input, outputs = final_output, name="extrema_model")
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.RMSprop(),
metrics=keras.metrics.BinaryAccuracy())
history = model.fit(train_ds, epochs=10)
I am trying to design a GAN using tensorflow.keras models and layers classes. I made a discriminator that takes in a list of 2 pictures and outputs a Dense sigmoid activated percentage of similarity:
prediction = Dense(1, activation = "sigmoid")(Flatten()(conv4))
model = Model(inputs = [firstImage, secondImage], outputs = prediction)
Then a generator that takes in a random one dimension vector and returns a picture out of it:
generated = Conv2D(3, kernel_size = (4, 4), padding = "same",
kernel_initializer = kernelInit, activation = "sigmoid")(conv5) # output shape (256, 256, 3)
model = Model(inputs = noise, outputs = generated)
I made a custom generator using a keras.ImageDataGenerator.flow_from_directory() to load in pictures:
def loadRealImages(batch):
for gen in pixGen.flow_from_directory(picturesPath, target_size = (256, 256),
batch_size = batch, class_mode = "binary"):`
yield gen
I didn't have any trouble compiling any of these two but then when I try to link them together into an adversarial model with this code:
inNoise = Input(shape = (generatorInNoise,))
fake = generator(inNoise) # get one fake
real = np.array(next(loadRealImages(1))[0], dtype = np.float32) # get one real image
discriminator.trainable = False # lock discriminator weights
prediction = discriminator([real, fake]) # check similarity
adversarial = Model(inputs = inNoise, outputs = [fake, prediction]) # set adversarial model
I get this error on the last line:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Cannot convert a Tensor of dtype resource to a NumPy array.
I ascertained the shape of inNoise, fake and prediction:
<class 'tensorflow.python.framework.ops.Tensor'> (None, 16) Tensor("input_4:0", shape=(None, 16), dtype=float32)
<class 'tensorflow.python.framework.ops.Tensor'> (None, 256, 256, 3) Tensor("model_1/Identity:0", shape=(None, 256, 256, 3), dtype=float32)
<class 'tensorflow.python.framework.ops.Tensor'> (1, 1) Tensor("dense_2/Identity:0", shape=(1, 1), dtype=float32)
But I still can't figure out what is raising the error and looking it up on google didn't really give me any pointers either. Can anyone help with this?
At the core, the issue here is that you're trying to make a numpy array a part of the computation graph. This can lead to undefined behaviour depending on how you use it. Some minor changes to you code can help:
inNoise = Input(shape = (generatorInNoise,))
fake = generator(inNoise) # get one fake
real = Input((real_image_shape)) # get one real image
discriminator.trainable = False # lock discriminator weights
prediction = discriminator([real, fake]) # check similarity
adversarial = Model(inputs = [inNoise, real], outputs = [fake, prediction]) # set adversarial model
As you can see, the real image needs to be provided as an input to the model, not derived as a part of it.
Hi I need to change the first convolution of a model from rgb/resnet_v1_50/conv1/weights:0 (float32_ref 7x7x3x64) to rgb/resnet_v1_50/conv1/weights:0 (float32_ref 7x7x4x64), so basicaly augmenting the number of filter form 3 to 4 to accept 4 channels images but keeping the pretrained weight elsewhere (just the additional channel initialize ramdonly).
Do you have an idea of how to do that in Tensorflow 1.x (I'm more of a PyTorch guy...) ?
In PyTorch I do:
net = model.resnet50(num_classes=dataset_train.num_classes(),pretrained=True)
new_conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2,padding=3,bias=False)
conv1 = net.conv1
with torch.no_grad():
new_conv1.weight[:, :3, :, :]= conv1.weight
new_conv1.bias = conv1.bias
net.conv1 = new_conv1
Here is how the model is created in tensorflow:
def single_stream(self, images, modality, is_training, reuse=False):
with tf.variable_scope(modality, reuse=reuse):
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
_, end_points = resnet_v1.resnet_v1_50(
images, self.no_classes, is_training=is_training, reuse=reuse)
# last bottleneck before logits
net = end_points[modality + '/resnet_v1_50/block4']
if 'autoencoder' in self.mode:
return net
with tf.variable_scope(modality + '/resnet_v1_50', reuse=reuse):
bottleneck = slim.conv2d(net, self.hidden_repr_size, [
7, 7], padding='VALID', activation_fn=tf.nn.relu, scope='f_repr')
net = slim.conv2d(bottleneck, self.no_classes, [
1, 1], activation_fn=None, scope='_logits_')
if ('train_hallucination' in self.mode or 'test_disc' in self.mode or 'train_eccv' in self.mode):
return net, bottleneck
return net
I am able with the command in the build_model: self.images = tf.placeholder(tf.float32, [None, 224, 224, 4], modality + '_images') to effectively change the 3 to a 4: rgb/resnet_v1_50/conv1/weights:0 (float32_ref 7x7x4x64) [12544, bytes: 50176] but the problem is thus now with the checkpoint!
Thanks a lot for your help!
As you do with Pytorch, you can do the same in Keras, which is now a module of TF2 (more info).
I'm gonna show you one possible way to do so:
net_conv1 = model.layers[2] # first 2D convolutional layer, from model.layers, or model.summary()
# your new set of weights must have same dimensions of the ouput of the layer
print( 'weights shape: ', numpy.shape(net_conv1.weights) )
print( net_conv1.weights[0].shape )
print( net_conv1.weights[1].shape )
# New weights
osh_0 = net_conv1.weights[0].shape.as_list()
osh_1 = net_conv1.weights[1].shape.as_list()
print(osh_0, osh_1)
new_conv1_w_0 = numpy.random.rand( *osh_0 )
new_conv1_w_1 = numpy.random.rand( *osh_1 )
# update the weights
net_conv1.set_weights([new_conv1_w_0, new_conv1_w_1])
# check the result
net_conv1.get_weights()
# update the model
model.layers[2] = net_conv1
Check the layers section of Keras doc.
Hope it will be helpful
I am implementing an OCR with Keras, Tensorflow backend.
I want to use keras.backend.ctc_decode implementation.
I have a model class :
import keras
def ctc_lambda_func(args):
y_pred, y_true, input_x_width, input_y_width = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
# y_pred = y_pred[:, 2:, :]
return keras.backend.ctc_batch_cost(y_true, y_pred, input_x_width, input_y_width)
class ModelOcropy(keras.Model):
def __init__(self, alphabet: str):
self.img_height = 48
self.lstm_size = 100
self.alphabet_size = len(alphabet)
# check backend input shape (channel first/last)
if keras.backend.image_data_format() == "channels_first":
input_shape = (1, None, self.img_height)
else:
input_shape = (None, self.img_height, 1)
# data input
input_x = keras.layers.Input(input_shape, name='x')
# training inputs
input_y = keras.layers.Input((None,), name='y')
input_x_widths = keras.layers.Input([1], name='x_widths')
input_y_widths = keras.layers.Input([1], name='y_widths')
# network
flattened_input_x = keras.layers.Reshape((-1, self.img_height))(input_x)
bidirectional_lstm = keras.layers.Bidirectional(
keras.layers.LSTM(self.lstm_size, return_sequences=True, name='lstm'),
name='bidirectional_lstm'
)(flattened_input_x)
dense = keras.layers.Dense(self.alphabet_size, activation='relu')(bidirectional_lstm)
y_pred = keras.layers.Softmax(name='y_pred')(dense)
# ctc loss
ctc = keras.layers.Lambda(ctc_lambda_func, output_shape=[1], name='ctc')(
[dense, input_y, input_x_widths, input_y_widths]
)
# init keras model
super().__init__(inputs=[input_x, input_x_widths, input_y, input_y_widths], outputs=[y_pred, ctc])
# ctc decoder
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, input_x_widths)
self.decoder = keras.backend.function([input_x, input_x_widths], [top_k_decoded[0]])
# decoded_sequences = self.decoder([test_input_data, test_input_lengths])
My use of ctc_decode comes from another post : Keras using Lambda layers error with K.ctc_decode
I get an error :
ValueError: Shape must be rank 1 but is rank 2 for 'CTCGreedyDecoder' (op: 'CTCGreedyDecoder') with input shapes: [?,?,7], [?,1].
I guess I have to squeeze my input_x_widths, but Keras does not seem to have such function (it always outputs something like (batch_size, 1))
Indeed, the function is expecting a 1D tensor, and you've got a 2D tensor.
Keras does have the keras.backend.squeeze(x, axis=-1) function.
And you can also use keras.backend.reshape(x, (-1,))
If you need to go back to the old shape after the operation, you can both:
keras.backend.expand_dims(x)
keras.backend.reshape(x,(-1,1))
Complete fix :
# ctc decoder
flattened_input_x_width = keras.backend.reshape(input_x_widths, (-1,))
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, flattened_input_x_width)
self.decoder = keras.backend.function([input_x, flattened_input_x_width], [top_k_decoded[0]])
# decoded_sequences = self.decoder([input_x, flattened_input_x_width])
I was thinking of re-use the lower part of tf.contrib.keras.applications.ResNet50 and port its output to my layers. I did:
tf.contrib.keras.backend.set_learning_phase(True)
tf_dataset = tf.contrib.data.Dataset.from_tensor_slices(\
np.arange(seq_index.size('velodyne')))\
.shuffle(1000)\
.repeat()\
.batch(10)\
.map(partial(ing_dataset_map, seq_index))
iterator = tf_dataset.make_initializable_iterator()
next_elements = iterator.get_next()
def model(input_maps):
input_maps = tf.reshape(input_maps, shape = [-1, 200, 200, 3])
resnet = tf.contrib.keras.applications.ResNet50(
include_top = False, weights = None,
input_shape = (200, 200, 3), pooling = None)
net = resnet.apply(input_maps)
temp = tf.get_default_graph().get_tensor_by_name('activation_40/Relu:0')
net = tf.layers.conv2d(inputs = temp,
filters = 2, kernel_size = [1, 1], padding = 'same',
activation = tf.nn.relu)
return net
m = model(next_elements['input_maps'])
with tf.Session() as sess:
sess.run(iterator.initializer)
sess.run(tf.global_variables_initializer())
ret = sess.run(m)
Then tensorflow will report:
You must feed a value for placeholder tensor 'input_1' with dtype float and shape [?,200,200,3]
If I directly use the output of the whole resnet.apply(input_maps). There will be no errors. So was just wondering how this could reformed? Thank you.
Found answer by myself. Should make use of the Model functionality to create a usable graph.
outputs = []
outputs.append(tf.get_default_graph().get_tensor_by_name('activation_25/Relu:0'))
outputs.append(tf.get_default_graph().get_tensor_by_name('activation_31/Relu:0'))
inputs = resnet.input
sub_resnet = tf.contrib.keras.models.Model(inputs, outputs)
low_branch, high_branch = sub_resnet.apply(input_maps)