Load/test previously trained and saved neural network Python - python

I define my neural network
class Classifier(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 64)
self.fc4 = nn.Linear(64, 10)
def forward(self, x):
# make sure input tensor is flattened
x = x.view(x.shape[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.log_softmax(self.fc4(x), dim=1)
return x
model = Classifier()
I train the neural network
I save the trained neural net:
checkpoint = {'input_size': 784,
'output_size': 10,
'hidden_layers': [256, 128, 64],
'state_dict': model.state_dict()}
torch.save(checkpoint, 'checkpoint.pth')
state_dict = torch.load('checkpoint.pth')
when I try to load the saved neural network, I get an error
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
# I believe the error is in the line directly below
model_b = model(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers'])
model_b.load_state_dict(checkpoint['state_dict'])
return model_b
model_b = load_checkpoint('checkpoint.pth')
I get the following error:
TypeError: forward() takes 2 positional arguments but 4 were given

I think you're missing up a few points:
Your class __init__ function takes no argument, your neural network has fixed parameters, so you can't use the other keys of your dict object to create a model that has the same parameters.
A nn.Module function has a method called __call__ that redirects to forward method. This function is run whenever you run Object(something) where something will be the function parameters. In load_checkpoint, you ran model_b = model(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']). You tried to do a forward pass using some elements from your dictionary. hence the error (the 4 parameters are model, checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']).
To fix the problem of loading the model, simply remove this line model_b = model(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']) and I think it should work.
If you wish to create a model using the checkpoint input_size, output_size and hidden layers, you should use these parameters on the constructor:
model = Classifier(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']). Your code will need a few changes in order for this to work.

Related

Initializing model parameters in pytorch manually

I am creating a separate class to initializer model and adding layers in a list, but those layers are not being added to parameters, plz tell how to add them to parameters() of model.
class Mnist_Net(nn.Module):
def __init__(self,input_dim,output_dim,hidden_layers=2,neurons=128):
super().__init__()
layers = []
for i in range(hidden_layers):
if len(layers) == 0:
layers.append(nn.Linear(input_dim,neurons))
if i == hidden_layers-1:
layers.append(nn.Linear(layers[-2].weight.shape[0],output_dim))
layers.append(nn.Linear(layers[i-1].weight.shape[0],neurons))
self.layers= layers
When I print model.parameters()
model = Mnist_Net(28*28,10,neurons=56)
for t in model.parameters():
print(t)
it shows nothing, but when I add layers in class like
self.layer1 = nn.Linear(input_dim,neurons)
It shows one layer in parameters.Plz tell How can I add all layers in self.layers in model.parameters()
To be registered in the parent module, your submodules should be nn.Modules themselves. In your case, you should wrap layers with nn.ModuleList:
self.layers = nn.ModuleList(layers)
Then, your layers will be registered:
>>> model = Mnist_Net(28*28,10, neurons=56)
>>> for t in model.parameters():
... print(t.shape)
torch.Size([56, 784])
torch.Size([56])
torch.Size([56, 56])
torch.Size([56])
torch.Size([10, 56])
torch.Size([10])
torch.Size([56, 56])
torch.Size([56])

Tensorflow: How to pass a state and an input to LSTMCell?

I'm trying to write my own recurrent network using LSTM Cells and I'm having trouble passing the state to LSTMCell. In my FeedBack class (based on tensorflow tutorial) there are following lines to initialize final layers of a model:
class FeedBack(tf.keras.Model):
def __init__(self, units, in_steps, out_steps, graph):
...
self.lstm_cell = tf.keras.layers.LSTMCell(units) # units = 128 in this example
# Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
self.dense = tf.keras.layers.Dense(1)
I've also defined a warmup method inside which the first prediction is made like so:
...
# merged.shape = (None, 24, 34)
x, *state = self.lstm_rnn(merged)
# x.shape =(None, 128)
# [x.shape for x in state] = [TensorShape([None, 128]), TensorShape([None, 128])]
prediction = self.dense(x)
And finally inside a call method predictions I'm trying to predict the values with:
...
# merged.shape = (None, 24, 34)
# state remains untouched
x, state = self.lstm_cell(merged, states=state)
prediction = self.dense(x)
But it results with the following error:
ValueError: Dimensions must be equal, but are 512 and 128 for '{{node feed_back/lstm_cell/mul}} = Mul[T=DT_FLOAT](feed_back/lstm_cell/Sigmoid_1, feed_back/rnn/while/Identity_5)' with input shapes: [?,6,512], [?,128].
Where does [?,6,512] come from? And how can I fix this?

how does the neural netwok definition in pytorch use pyton classes

in order to understand how this code works, I have written a small reproducer. How does the self.hidden variable use a variable x in the forward method?
enter code class Network(nn.Module):
def __init__(self):
super().__init__()
# Inputs to hidden layer linear transformation
self.hidden = nn.Linear(784, 256)
# Output layer, 10 units - one for each digit
self.output = nn.Linear(256, 10)
# Define sigmoid activation and softmax output
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
# Pass the input tensor through each of our operations
x = self.hidden(x)
x = self.sigmoid(x)
x = self.output(x)
x = self.softmax(x)
return x
You misunderstood what self.hidden = nn.Linear(784, 256) does. You wrote that:
hidden is defined as a function
but this is not true. self.hidden is an object of the class nn.Linear. And when you call self.hidden(...), you are not passing arguments to nn.Linear; you are passing arguments to __call__ (defined in the nn.Linear class).
If you want more details on that, I have expanded on how it works in PyTorch: see this answer.

Why listing model components in pyTorch is not useful?

I am trying to create Feed forward neural networks with N layers
So idea is suppose If I want 2 inputs 3 hidden and 2 outputs than I will just pass [2,3,2] to neural network class and neural network model will get created so if I want [100,1000,1000,2]
where in this case 100 is inputs, two hidden layers contains 1000 neuron each and 2 outputs so I want fully connected neural network where I just wanted to pass list which contains number of neuron in each layer.
So for that I have written following code
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self, layers):
super(FeedforwardNeuralNetModel, self).__init__()
self.fc=[]
self.sigmoid=[]
self.activationValue = []
self.layers = layers
for i in range(len(layers)-1):
self.fc.append(nn.Linear(layers[i],layers[i+1]))
self.sigmoid.append(nn.Sigmoid())
def forward(self, x):
out=x
for i in range(len(self.fc)):
out=self.fc[i](out)
out = self.sigmoid[i](out)
return out
when I tried to use it I found it kind of empty model
model=FeedforwardNeuralNetModel([3,5,10,2])
print(model)
>>FeedforwardNeuralNetModel()
and when I used following code
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(FeedforwardNeuralNetModel, self).__init__()
# Linear function
self.fc1 = nn.Linear(input_dim, hidden_dim)
# Non-linearity
self.tanh = nn.Tanh()
# Linear function (readout)
self.fc2 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Linear function
out = self.fc1(x)
# Non-linearity
out = self.tanh(out)
# Linear function (readout)
out = self.fc2(out)
return out
and when I tried to print this model I found following result
print(model)
>>FeedforwardNeuralNetModel(
(fc1): Linear(in_features=3, out_features=5, bias=True)
(sigmoid): Sigmoid()
(fc2): Linear(in_features=5, out_features=10, bias=True)
)
in my code I am just creating lists that is what difference
I just wanted to understand why in torch listing model components is not useful?
If you do print(FeedForwardNetModel([1,2,3]) it gives the following error
AttributeError: 'FeedforwardNeuralNetModel' object has no attribute '_modules'
which basically means that the object is not able to recognize modules that you have declared.
Why does this happen?
Currently, modules are declared in self.fc which is list and hence torch has no way of knowing if it is a model unless it does a deep search which is bad and inefficient.
How can we let torch know that self.fc is a list of modules?
By using nn.ModuleList (See modified code below). ModuleList and ModuleDict are python list and dictionaries respectively, but they tell torch that the list/dict contains a nn module.
#modified init function
def __init__(self, layers):
super().__init__()
self.fc=nn.ModuleList()
self.sigmoid=[]
self.activationValue = []
self.layers = layers
for i in range(len(layers)-1):
self.fc.append(nn.Linear(layers[i],layers[i+1]))
self.sigmoid.append(nn.Sigmoid())

Find input that maximises output of a neural network using Keras and TensorFlow

I have used Keras and TensorFlow to classify the Fashion MNIST following this tutorial .
It uses the AdamOptimizer to find the value for model parameters that minimize the loss function of the network. The input for the network is a 2-D tensor with shape [28, 28], and output is a 1-D tensor with shape [10] which is the result of a softmax function.
Once the network has been trained, I want to use the optimizer for another task: find an input that maximizes one of the elements of the output tensor. How can this be done? Is it possible to do so using Keras or one have to use a lower level API?
Since the input is not unique for a given output, it would be even better if we could impose some constraints on the values the input can take.
The trained model has the following format
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(10, activation=tf.nn.softmax)
])
I feel you would want to backprop with respect to the input freezing all the weights to your model. What you could do is:
Add a dense layer after the input layer with the same dimensions as input and set it as trainable
Freeze all the other layers of your model. (except the one you added)
As an input, feed an identity matrix and train your model based on whatever output you desire.
This article and this post might be able to help you if you want to backprop based on the input instead. It's a bit like what you are aiming for but you can get the intuition.
It would be very similar to the way that filters of a Convolutional Network is visualized: we would do gradient ascent optimization in input space to maximize the response of a particular filter.
Here is how to do it: after training is finished, first we need to specify the output and define a loss function that we want to maximize:
from keras import backend as K
output_class = 0 # the index of the output class we want to maximize
output = model.layers[-1].output
loss = K.mean(output[:,output_class]) # get the average activation of our desired class over the batch
Next, we need to take the gradient of the loss we have defined above with respect to the input layer:
grads = K.gradients(loss, model.input)[0] # the output of `gradients` is a list, just take the first (and only) element
grads = K.l2_normalize(grads) # normalize the gradients to help having an smooth optimization process
Next, we need to define a backend function that takes the initial input image and gives the values of loss and gradients as outputs, so that we can use it in the next step to implement the optimization process:
func = K.function([model.input], [loss, grads])
Finally, we implement the gradient ascent optimization process:
import numpy as np
input_img = np.random.random((1, 28, 28)) # define an initial random image
lr = 1. # learning rate used for gradient updates
max_iter = 50 # number of gradient updates iterations
for i in range(max_iter):
loss_val, grads_val = func([input_img])
input_img += grads_val * lr # update the image based on gradients
Note that, after this process is finished, to display the image you may need to make sure that all the values in the image are in the range [0, 255] (or [0,1]).
After the hints Saket Kumar Singh gave in his answer, I wrote the following that seems to solve the question.
I create two custom layers. Maybe Keras offers already some classes that are equivalent to them.
The first on is a trainable input:
class MyInputLayer(keras.layers.Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyInputLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=self.output_dim,
initializer='uniform',
trainable=True)
super(MyInputLayer, self).build(input_shape)
def call(self, x):
return self.kernel
def compute_output_shape(self, input_shape):
return self.output_dim
The second one gets the probability of the label of interest:
class MySelectionLayer(keras.layers.Layer):
def __init__(self, position, **kwargs):
self.position = position
self.output_dim = 1
super(MySelectionLayer, self).__init__(**kwargs)
def build(self, input_shape):
super(MySelectionLayer, self).build(input_shape)
def call(self, x):
mask = np.array([False]*x.shape[-1])
mask[self.position] = True
return tf.boolean_mask(x, mask,axis=1)
def compute_output_shape(self, input_shape):
return self.output_dim
I used them in this way:
# Build the model
layer_flatten = keras.layers.Flatten(input_shape=(28, 28))
layerDense1 = keras.layers.Dense(128, activation=tf.nn.relu)
layerDense2 = keras.layers.Dense(10, activation=tf.nn.softmax)
model = keras.Sequential([
layer_flatten,
layerDense1,
layerDense2
])
# Compile the model
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Train the model
# ...
# Freeze the model
layerDense1.trainable = False
layerDense2.trainable = False
# Build another model
class_index = 7
layerInput = MyInputLayer((1,784))
layerSelection = MySelectionLayer(class_index)
model_extended = keras.Sequential([
layerInput,
layerDense1,
layerDense2,
layerSelection
])
# Compile it
model_extended.compile(optimizer=tf.train.AdamOptimizer(),
loss='mean_absolute_error')
# Train it
dummyInput = np.ones((1,1))
target = np.ones((1,1))
model_extended.fit(dummyInput, target,epochs=300)
# Retrieve the weights of layerInput
layerInput.get_weights()[0]
Interesting. Maybe a solution would be to feed all your data to the network and for each sample save the output_layer after softmax.
This way, for 3 classes, where you want to find the best input for class 1, you are looking for outputs where the first component is high. For example: [1 0 0]
Indeed the output means the probability, or the confidence of the network, for the sample being one of the classes.
Funny coincident I was just working on the same "problem". I'm interested in the direction of adversarial training etc. What I did was to insert a LocallyConnected2D Layer after the input and then train with data which is all one and has as targets the class of interest.
As model I use
batch_size = 64
num_classes = 10
epochs = 20
input_shape = (28, 28, 1)
inp = tf.keras.layers.Input(shape=input_shape)
conv1 = tf.keras.layers.Conv2D(32, kernel_size=(3, 3),activation='relu',kernel_initializer='he_normal')(inp)
pool1 = tf.keras.layers.MaxPool2D((2, 2))(conv1)
drop1 = tf.keras.layers.Dropout(0.20)(pool1)
flat = tf.keras.layers.Flatten()(drop1)
fc1 = tf.keras.layers.Dense(128, activation='relu')(flat)
norm1 = tf.keras.layers.BatchNormalization()(fc1)
dropfc1 = tf.keras.layers.Dropout(0.25)(norm1)
out = tf.keras.layers.Dense(num_classes, activation='softmax')(dropfc1)
model = tf.keras.models.Model(inputs = inp , outputs = out)
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.RMSprop(),
metrics=['accuracy'])
model.summary()
after training I insert the new layer
def insert_intermediate_layer_in_keras(model,position, before_layer_id):
layers = [l for l in model.layers]
if(before_layer_id==0) :
x = new_layer
else:
x = layers[0].output
for i in range(1, len(layers)):
if i == before_layer_id:
x = new_layer(x)
x = layers[i](x)
else:
x = layers[i](x)
new_model = tf.keras.models.Model(inputs=layers[0].input, outputs=x)
return new_model
def fix_model(model):
for l in model.layers:
l.trainable=False
fix_model(model)
new_layer = tf.keras.layers.LocallyConnected2D(1, kernel_size=(1, 1),
activation='linear',
kernel_initializer='he_normal',
use_bias=False)
new_model = insert_intermediate_layer_in_keras(model,new_layer,1)
new_model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.RMSprop(),
metrics=['accuracy'])
and finally rerun training with my fake data.
X_fake = np.ones((60000,28,28,1))
print(Y_test.shape)
y_fake = np.ones((60000))
Y_fake = tf.keras.utils.to_categorical(y_fake, num_classes)
new_model.fit(X_fake, Y_fake, epochs=100)
weights = new_layer.get_weights()[0]
imshow(weights.reshape(28,28))
plt.show()
Results are not yet satisfying but I'm confident of the approach and guess I need to play around with the optimiser.

Categories

Resources