I have this network where x_tr2 has a shape of (120k,4,3,3,1) and x_tr1 has shape of (120k, 120,140,3). For each entry in x_tr1 there's a corresponding temporal information stacked in the shape of (4,3,3,1) in x_tr2, which is passed in LSTM network whenever a new entry comes in x_tr1. I tried using time distributed layer for this but it moves with (time step=1), I rather want something which can move with (time step=4), Here's my code:
Basically I want x_tr2[0], x_tr2[1], x_tr2[2], x_tr2[3] in time distributed stack whenever x_tr1[0] enters model_vgg19. Similarly x_tr2[4], x_tr2[5], x_tr2[6], x_tr2[7] in time distributed stack whenever x_tr1[1] enters model_vgg19.
temp_inp = Input(shape = (4, 3,3, 1))
lstm1 = TimeDistributed(Convolution2D(32, (3,3), strides = (1, 1), activation = 'relu'))(temp_inp)
lstm2 = TimeDistributed(Flatten())(lstm1)
lstm3 = TimeDistributed(BatchNormalization())(lstm2)
decoder = Bidirectional(LSTM(25,return_sequences=False))(lstm3)
img_inp = Input(shape = (120, 144, 3))
model_vgg19 = InceptionV3(weights = 'imagenet', include_top = False)(img_inp)
pool = GlobalAveragePooling2D()(model_vgg19)
b = Dropout(0.2)(pool)
c=Dense(10,activation='relu')(b)
cnn_out = Dense(10, activation = 'relu')(c)
merged = keras.layers.concatenate([cnn_out, decoder], axis=1)
layer2=Dense(1, activation = 'sigmoid')(merged)
merged_model=Model(inputs=[img_inp,temp_inp],outputs= layer2)
merged_model.summary()
merged_model.compile(loss = 'binary_crossentropy', optimizer = 'SGD', metrics = ['accuracy', tf.keras.metrics.AUC()])
training = merged_model.fit([x_tr1,x_tr2], y_tr,validation_data=([x_te1,x_te2],y_te), epochs = 10, shuffle = False, batch_size = 64)
Related
I'm building a neural network using keras and I'm a little lost on the LSTM layer input shape. Below is an image of the relevant part.
Both towers are similar with the only difference that the left accepts sequences of any length and the right only accepts sequences of length 5. This results in their LSTM layers receiving an ambiguous sequence length and a sequence length of 4 respectively, both with 8 features per timestep. I'd thus expect both LSTM layers should have an input_shape of (1,8).
My confusion now comes from the fact that both LSTM layers will accept any input shape without a problem, which is why I think this might not work the way I think it does. I'd expect the right LSTM layer to require an input shape with the first dimension either 1, 2 or 4 as only these sizes would be able to divide the input sequence of 4. Further, I'd expect both to require the second dimension to always be 8.
Could someone explain why the LSTM layers can accept any input shape and if they process the sequnces correctly with an input_shape=(1,8)? Below is the relevant code.
# Tower 1
inp_sentence1 = Input(shape=(None, 300, 1))
conv11 = Conv2D(32, (2, 300))(inp_sentence1)
reshape11 = K.squeeze(conv11, 2)
maxpl11 = MaxPooling1D(4, data_format='channels_first')(reshape11)
lstm11 = LSTM(units=6, input_shape=(1,8))(maxpl11)
# Tower 2
inp_sentence2 = Input(shape=(5, 300, 1))
conv21 = Conv2D(32, (2, 300))(inp_sentence2)
reshape21 = Reshape((4,32))(conv21)
maxpl21 = MaxPooling1D(4, data_format='channels_first')(reshape21)
lstm21 = LSTM(units=6, input_shape=(1,8))(maxpl21)
EDIT: Short reproduction of problem on dummy data:
# Tower 1
inp_sentence1 = Input(shape=(None, 300, 1))
conv11 = Conv2D(32, (2, 300))(inp_sentence1)
reshape11 = K.squeeze(conv11, 2)
maxpl11 = MaxPooling1D(4, data_format='channels_first')(reshape11)
lstm11 = LSTM(units=6, input_shape=(1,8))(maxpl11)
# Tower 2
inp_sentence2 = Input(shape=(5, 300, 1))
conv21 = Conv2D(32, (2, 300))(inp_sentence2)
reshape21 = Reshape((4,32))(conv21)
maxpl21 = MaxPooling1D(4, data_format='channels_first')(reshape21)
lstm21 = LSTM(units=6, input_shape=(1,8))(maxpl21)
# Combine towers
substract = Subtract()([lstm11, lstm21])
dense = Dense(16, activation='relu')(substract)
final = Dense(1, activation='sigmoid')(dense)
# Build model
model = Model([inp_sentence1, inp_sentence2], final)
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
# Create data
random_length = random.randint(2, 10)
x1 = numpy.random.random((100, random_length, 300))
x2 = numpy.random.random((100, 5, 300))
y = numpy.random.randint(2, size=100)
# Train and predict on data
model.fit([x1, x2], y, epochs=10, batch_size=5)
prediction = model.predict([x1, x2])
prediction = [round(x) for [x] in prediction]
classification = prediction == y
print("accuracy:", sum(classification)/len(prediction))
I am trying to create a model with Normalized cross correlation custom layer, code taken from here
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D, Dense, Input, Flatten
from keras.models import Model, Sequential
from keras.engine import InputSpec, Layer
from keras import regularizers
from keras.optimizers import SGD, Adam
from keras.utils.conv_utils import conv_output_length
from keras import activations
import numpy as np
class Normalized_Correlation_Layer(Layer):
# create a class inherited from keras.engine.Layer.
def __init__(self, patch_size=(5, 5),
dim_ordering='tf',
border_mode='same',
stride=(1, 1),
activation=None,
**kwargs):
if border_mode != 'same':
raise ValueError('Invalid border mode for Correlation Layer '
'(only "same" is supported as of now):', border_mode)
self.kernel_size = patch_size
self.subsample = stride
self.dim_ordering = dim_ordering
self.border_mode = border_mode
self.activation = activations.get(activation)
super(Normalized_Correlation_Layer, self).__init__(**kwargs)
def compute_output_shape(self, input_shape):
return(input_shape[0][0], input_shape[0][1], input_shape[0][2], self.kernel_size[0] * input_shape[0][2]*input_shape[0][-1])
def get_config(self):
config = {'patch_size': self.kernel_size,
'activation': self.activation.__name__,
'border_mode': self.border_mode,
'stride': self.subsample,
'dim_ordering': self.dim_ordering}
base_config = super(Correlation_Layer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, x, mask=None):
input_1, input_2 = x
stride_row, stride_col = self.subsample
inp_shape = input_1._keras_shape
output_shape = self.compute_output_shape([inp_shape, inp_shape])
padding_row = (int(self.kernel_size[0] / 2),int(self.kernel_size[0] / 2))
padding_col = (int(self.kernel_size[1] / 2),int(self.kernel_size[1] / 2))
input_1 = K.spatial_2d_padding(input_1, padding =(padding_row,padding_col))
input_2 = K.spatial_2d_padding(input_2, padding = ((padding_row[0]*2, padding_row[1]*2),padding_col))
output_row = output_shape[1]
output_col = output_shape[2]
output = []
for k in range(inp_shape[-1]):
xc_1 = []
xc_2 = []
# print("here")
for i in range(padding_row[0]):
for j in range(output_col):
xc_2.append(K.reshape(input_2[:, i:i+self.kernel_size[0], j:j+self.kernel_size[1], k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
for i in range(output_row):
slice_row = slice(i, i + self.kernel_size[0])
slice_row2 = slice(i + padding_row[0], i +self.kernel_size[0] + padding_row[0])
# print("dfg")
for j in range(output_col):
slice_col = slice(j, j + self.kernel_size[1])
xc_2.append(K.reshape(input_2[:, slice_row2, slice_col, k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
xc_1.append(K.reshape(input_1[:, slice_row, slice_col, k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
for i in range(output_row, output_row+padding_row[1]):
for j in range(output_col):
xc_2.append(K.reshape(input_2[:, i:i+ self.kernel_size[0], j:j+self.kernel_size[1], k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
xc_1_aggregate = K.concatenate(xc_1, axis=1)
xc_1_mean = K.mean(xc_1_aggregate, axis=-1, keepdims=True)
xc_1_std = K.std(xc_1_aggregate, axis=-1, keepdims=True)
xc_1_aggregate = (xc_1_aggregate - xc_1_mean) / xc_1_std
xc_2_aggregate = K.concatenate(xc_2, axis=1)
xc_2_mean = K.mean(xc_2_aggregate, axis=-1, keepdims=True)
xc_2_std = K.std(xc_2_aggregate, axis=-1, keepdims=True)
xc_2_aggregate = (xc_2_aggregate - xc_2_mean) / xc_2_std
xc_1_aggregate = K.permute_dimensions(xc_1_aggregate, (0, 2, 1))
block = []
len_xc_1= len(xc_1)
print("asdf")
for i in range(len_xc_1):
#This for loop is to compute the product of a given patch of feature map 1 and the feature maps on which it is supposed to
sl1 = slice(int(i/inp_shape[2])*inp_shape[2],
int(i/inp_shape[2])*inp_shape[2]+inp_shape[2]*self.kernel_size[0])
#This calculates which are the patches of feature map 2 to be considered for a given patch of first feature map.
block.append(K.reshape(K.batch_dot(xc_2_aggregate[:,sl1,:],
xc_1_aggregate[:,:,i]),(-1,1,1,inp_shape[2] *self.kernel_size[0])))
block = K.concatenate(block, axis=1)
# print("zxcv")
block= K.reshape(block,(-1,output_row,output_col,inp_shape[2] *self.kernel_size[0]))
output.append(block)
output = self.activation(output)
print(output)
return output
My model is a combination of cross correlation and Conv2D layers,
dt = 'float32'
def create_model():
ip = keras.layers.Input((50,50, 1))
ncx1_1 = Normalized_Correlation_Layer(patch_size=(1, 1))([ip,ip])
ncn1_1 = keras.layers.Conv2D(64, (1,1), activation = 'relu', dtype=dt)(ip)
ncn2_1 = keras.layers.Conv2D(64, (1,1), activation = 'relu', dtype=dt)(ncx1_1)
ncx2_1 = Normalized_Correlation_Layer(patch_size=(1, 1),dtype=dt)([ncn1_1,ncn2_1])
# ncx2_1 = keras.layers.Reshape((50, 50, 3200))(ncx2_1)
# Problem occurs here
ncn3 = keras.layers.Conv2D(filters=64,kernel_size=(1,1), activation = 'relu', dtype=dt)(ncx2_1)
ncn4 = keras.layers.Conv2D(12, (1,1), activation = 'sigmoid', dtype=dt)(ncn3)
model = keras.models.Model(ip,ncn4)
return model
The model till the last cross correlation layer is successfully created, but I get problem for ncn3 layer
ValueError: number of input channels does not match corresponding dimension of filter, 50 != 3200
The output shape printed from the ncx2_1 layer, while creating it is printed as (?, 50, 50, 50),
when I print ncx2_1.shape and also the outputs returned from call function of layer class ([<tf.Tensor 'normalized__correlation__layer_4/Reshape_10000:0' shape=(?, 50, 50, 50) dtype=float32>]).
But the model summary shows it as (?,50,50,3200) when I create the model till that layer only, ie. model = keras.models.Model(ip,ncx2_1)
When I reshape the layer using ncx2_1 = keras.layers.Reshape((50, 50, 3200))(ncx2_1) , I can create the model successfully, but when I try to fit the data on it, I get :
InvalidArgumentError: Input to reshape is a tensor with 6250000 values, but the requested shape has 400000000
[[node reshape_1/Reshape (defined at /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1781) ]]
[[node loss/mul (defined at /usr/local/lib/python3.6/dist-packages/keras/engine/training.py:865) ]]
Here, my batch size is 50, so for a layer with (B,H,W,C) inputs of (50,50,50,50), the size should be 6250000, butt for (50,50,50,3200), it should be 400000000, which means that the output of cross correlation layer is 50 channels.
I am either interpreting this wrong or I have made a mistake somewhere which I would like to know about.
I am using keras 2.1.2 with tensorflow 1.13.1 (That was the version in which the custom layer was written and I was getting other problems with latest version)
I am also using a custom generator if that is needed info and calling fit using md.fit_generator(train_gen,verbose=1). I can also add any other detail necessary.
I was thinking of re-use the lower part of tf.contrib.keras.applications.ResNet50 and port its output to my layers. I did:
tf.contrib.keras.backend.set_learning_phase(True)
tf_dataset = tf.contrib.data.Dataset.from_tensor_slices(\
np.arange(seq_index.size('velodyne')))\
.shuffle(1000)\
.repeat()\
.batch(10)\
.map(partial(ing_dataset_map, seq_index))
iterator = tf_dataset.make_initializable_iterator()
next_elements = iterator.get_next()
def model(input_maps):
input_maps = tf.reshape(input_maps, shape = [-1, 200, 200, 3])
resnet = tf.contrib.keras.applications.ResNet50(
include_top = False, weights = None,
input_shape = (200, 200, 3), pooling = None)
net = resnet.apply(input_maps)
temp = tf.get_default_graph().get_tensor_by_name('activation_40/Relu:0')
net = tf.layers.conv2d(inputs = temp,
filters = 2, kernel_size = [1, 1], padding = 'same',
activation = tf.nn.relu)
return net
m = model(next_elements['input_maps'])
with tf.Session() as sess:
sess.run(iterator.initializer)
sess.run(tf.global_variables_initializer())
ret = sess.run(m)
Then tensorflow will report:
You must feed a value for placeholder tensor 'input_1' with dtype float and shape [?,200,200,3]
If I directly use the output of the whole resnet.apply(input_maps). There will be no errors. So was just wondering how this could reformed? Thank you.
Found answer by myself. Should make use of the Model functionality to create a usable graph.
outputs = []
outputs.append(tf.get_default_graph().get_tensor_by_name('activation_25/Relu:0'))
outputs.append(tf.get_default_graph().get_tensor_by_name('activation_31/Relu:0'))
inputs = resnet.input
sub_resnet = tf.contrib.keras.models.Model(inputs, outputs)
low_branch, high_branch = sub_resnet.apply(input_maps)
I am currently having problems resolving this issue.
I have a network in which i feed in numpy.ndarray of shape
(batch_size,40,45,3)
but I want to do 1d convolution on each row, and then append each result in to a list.
This requires that I somehow can extract the rows of my input, but how do access the input data inside the model definition..
This is what currently have:
def row_convolution(input):
filter_size = 8
for units in xrange(splits):
extract = input[units:units+filter_size,:,:]
for row_of_extract in extract:
row_of_extract = np.expand_dims(row_of_extract,axis=0)
temp_list.append((Conv1D(filters = 1, kernel_size = 1, activation='relu' , name = 'conv')(tf.pack(row_of_extract))))
print len(temp_list)
sum_temp_list.append(sum(temp_list))
conv_feature_map.append(sum_temp_list)
return np.array(conv_feature_map)
def output_of_row_convolution(input_shape):
return (1, input_shape)
def fws():
#Input shape: (batch_size,40,45,3)
#output shape: (1,15,50)
# number of unit in conv_feature_map = split
filter_size = 8
temp_list = []
sun_temp_list = []
conv_featur_map = []
model = Sequential()
#convolution
model.add(Lambda(row_convolution,output_shape=output_of_row_convolution,input_shape = (40,45,3)))
model.compile(loss="categorical_crossentropy", optimizer="SGD" , metrics = [metrics.categorical_accuracy])
#Pooling
hist_current = model.fit_generator(train_generator(batch_size),
steps_per_epoch=10,
epochs = 100000,
verbose = 1,
validation_data = test_generator(),
validation_steps=1,
pickle_safe = True,
workers = 4)
fws()
I have trained a model using Keras with tf as the backend as such:
activation = 'relu'
initializer = 'he_normal'
n_hidden = [256,128]
n_filters = [32]
input_shape = (batch_size,7213,1)
model = Sequential([
InputLayer(batch_input_shape=input_shape),
Convolution1D(nb_filter=n_filters[0], filter_length=8, activation=activation, border_mode='same', init=initializer, input_shape=input_shape),
MaxPooling1D(pool_length=4),
Flatten(),
Dense(output_dim=n_hidden[0], activation=activation, init=initializer),
Dense(output_dim=n_hidden[1], activation=activation, init=initializer),
Dense(output_dim=3, input_dim=n_hidden[1], activation='linear'),
])
I need to build a theano function that replicates model.predict() with my saved weights in order to return the Jacobian matrix of the outputs w.r.t. the inputs.
The following numpy code gives the same results as model.predict() :
pool_length=4
x_ = test_X_data.reshape(batch_size,7213)
weights_ = model.get_weights() #obtain model weights
#reshape CNN weights and bias weights
weights_[0] = np.reshape(weights_[0], (weights_[0].shape[0], weights_[0].shape[3]))
weights_[1] = np.reshape(weights_[1], (1, weights_[1].shape[0]))
weights_[3] = np.reshape(weights_[3], (1, weights_[3].shape[0]))
weights_[5] = np.reshape(weights_[5], (1, weights_[5].shape[0]))
weights_[7] = np.reshape(weights_[7], (1, weights_[7].shape[0]))
# pad left and right sides of input
x_padded = np.pad(x_, ((0, 0), (3, 4)), mode='constant')
# compute Conv1d layer with bias weights
prediction = np.zeros((x_.shape[0],x_.shape[1],weights_[0].shape[1]))
for i in range(x_.shape[1]):
prediction[:,i] = np.dot(x_padded[:,i:i+8],weights_[0])+weights_[1]
prediction = prediction
# RELU activation
prediction[prediction<0]=0
# Max pooling layer
pred_temp = np.zeros((prediction.shape[0],prediction.shape[1]/pool_length,prediction.shape[2]))
for i in range(prediction.shape[2]):
for j in range(prediction.shape[1]/pool_length):
pred_temp[:,j,i] = np.max(prediction[:,j*4:(j+1)*4,i],axis=1)
prediction = pred_temp.reshape(pred_temp.shape[0],pred_temp.shape[1]*pred_temp.shape[2])
# Dense layers
weights=np.vstack([weights_[2],weights_[3]])
prediction=np.hstack([prediction,np.zeros(prediction.shape[0]).reshape(prediction.shape[0],1)])
prediction[:,-1]=1
prediction=np.dot(prediction,weights)
prediction[prediction<0]=0
weights=np.vstack([weights_[4],weights_[5]])
prediction=np.hstack([prediction,np.zeros(prediction.shape[0]).reshape(prediction.shape[0],1)])
prediction[:,-1]=1
prediction=np.dot(prediction,weights)
prediction[prediction<0]=0
weights=np.vstack([weights_[6],weights_[7]])
prediction=np.hstack([prediction,np.zeros(prediction.shape[0]).reshape(prediction.shape[0],1)])
prediction[:,-1]=1
prediction= np.dot(prediction,weights)
Below is my attempt at turning this into a theano function to compute the Jacobian, but I believe the forloops are making it too slow to compile.
theano.config.optimizer='fast_compile'
theano.config.exception_verbosity='high'
weights_in_model = theano.typed_list.TypedListType(theano.tensor.dmatrix)()
x = T.matrix('x')
def pred_jac(x,weights_in_model):
pool_length=4
x = T.concatenate((T.zeros((T.shape(x)[0], 3)),x,T.zeros((T.shape(x)[0], 4))))
# Apply Convolution weights to input
prediction = []
for i in range(7213):
prediction.append(T.dot(x[:,i:i+8],weights_in_model[0])+weights_in_model[1])
prediction = T.as_tensor_variable(prediction)
prediction = T.clip(prediction, 0, 9999.) # RELU activation
prediction.dimshuffle(1,0,2) # Reformat to proper dimension order
# Maxpooling layer
pred_temp = []
for i in range(32):
pred_temp_b = []
for j in range(1803):
pred_temp_b.append(T.max(prediction[:,j*pool_length:(j+1)*pool_length,i],axis=1))
pred_temp.append(T.as_tensor_variable(pred_temp_b))
pred_temp = T.as_tensor_variable(pred_temp)
pred_temp.dimshuffle(1,2,0)
# Dense layers
prediction = T.reshape(pred_temp,(T.shape(pred_temp)[0],T.shape(pred_temp)[1]*T.shape(pred_temp)[2]))
weights = T.concatenate((weights_in_model[2],weights_in_model[3]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[4],weights_in_model[5]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = denormalize(T.dot(prediction, weights))
prediction = T.flatten(prediction)
return prediction
# Jacobian returns the first order partial derivatives of outputs w.r.t inputs
jac = theano.gradient.jacobian(pred_jac(x,weights_in_model),wrt=x)
compute_jac = theano.function([x,weights_in_model],[jac],allow_input_downcast=True)
Any suggestions on how to improve this function and/or speed up the function compile and computation times?