I am creating a GRU to predict if data derived from traffic packets from a device is considered safe or anomalous. I plan to do this by training a model only on safe/ normal operating data and then having it check what it considers new unseen traffic to be (testing). I wish to only train on the safe data (one class) as an attack could take many forms and I don't want to train the model on labeled attack data and then have it miss an attack type that I didn't train it on (basically I want to overfit on the normal operating data). As such I need it to be able to check if the incoming unlabeled data matches the one class it has already trained on (i.e. does the incoming data match the normal operation of the device) or if it is anomalous.
The issue I am having is that as the model is being trained on only one class it is having trouble differentiating the anomalous unseen data from normal data and considers virtually all data that it sees as normal (same as the class it trained on).
As such I would appreciate it if anyone has any ideas or could point out flaws in the way I have implemented by model.
# Imports
import pandas as pd
import numpy as np
import torch
import torchvision # torch package for vision related things
import torch.nn.functional as F # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets # Standard datasets
import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation
from torch import optim # For optimizers like SGD, Adam, etc.
from torch import nn # All neural network modules
from torch.utils.data import Dataset, DataLoader # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm # For a nice progress bar
from sklearn.preprocessing import StandardScaler
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
input_size = 24
hidden_size = 128
num_layers = 1
num_classes = 2
sequence_length = 1
learning_rate = 0.005
batch_size = 8
num_epochs = 5
# Recurrent neural network with GRU (many-to-one)
class RNN_GRU(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN_GRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
def forward(self, x):
# Set initial hidden and cell states
x = x.unsqueeze(1)
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# Forward propagate GRU
out, _ = self.gru(x, h0)
out = out[:, -1, :]
# Decode the hidden state of the last time step
out = self.fc(out)
return out
class MyDataset(Dataset):
def __init__(self,file_name):
stats_df=pd.read_csv(file_name)
x=stats_df.iloc[:,0:24].values
y=stats_df.iloc[:,24].values
self.x_train=torch.tensor(x,dtype=torch.float32)
self.y_train=torch.tensor(y,dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self,idx):
return self.x_train[idx],self.y_train[idx]
nomDs=MyDataset("nomStats.csv")
atkDs=MyDataset("atkStats.csv")
train_loader=DataLoader(dataset=nomDs,batch_size=batch_size)
test_loader=DataLoader(dataset=atkDs,batch_size=batch_size)
# Initialize network
model = RNN_GRU(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
# Get data to cuda if possible
data = data.to(device=device).squeeze(1)
targets = targets.to(device=device)
targets = targets.to(dtype=torch.long)
# forward
scores = model(data)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent update step/adam step
optimizer.step()
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
# Set model to eval
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device).squeeze(1)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
# Toggle model back to train
model.train()
return num_correct / num_samples
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}%")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}%")
Related
I've trained a simple neural net using skorch to make it sklearn compatible and I would like to know how to retrieve the actual estimated weights.
Here's a replicable example of what I need.
The neural net presented here uses 10 features, has one hidden layer of 2 nodes, uses ReLu activation functions and linearly combines the output of the 2 nodes.
import torch
import numpy as np
from torch.autograd import Variable
# Create example data
np.random.seed(2022)
train_size = 1000
n_features= 10
X_train = np.random.rand(n_features, train_size).astype("float32")
l2_params_1 = np.random.rand(1,n_features).astype("float32")
l2_params_2 = np.random.rand(1,n_features).astype("float32")
l1_X = np.matmul(l2_params_1, X_train)
l2_X = np.matmul(l2_params_2, X_train)
y_train = l1_X + l2_X
# Defining my NN
class NNModule(torch.nn.Module):
def __init__(self, in_features):
super(NNModule, self).__init__()
self.l1 = torch.nn.Linear(in_features, 2)
self.a1 = torch.nn.ReLU()
self.l2 = torch.nn.Linear(2, 1)
def forward(self, x):
x = self.l1(x)
x = self.a1(x)
return self.l2(x)
# Initialize the NN
torch.manual_seed(200)
model = NNModule(in_features = 10)
model.l1.weight.data.uniform_(0.0, 1.0)
model.l1.bias.data.uniform_(0.0, 1.0)
# Define criterion and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# Train the NN
torch.manual_seed(200)
for epoch in range(100):
inputs = Variable(torch.from_numpy(np.transpose(X_train)))
labels = Variable(torch.from_numpy(np.transpose(y_train)))
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
The parameters at which I'm arriving are the following:
list(model.parameters())
[Output]:
[Parameter containing:
tensor([[0.8997, 0.8345, 0.8284, 0.6950, 0.5949, 0.1217, 0.9067, 0.1824, 0.8272,
0.2372],
[0.7525, 0.6577, 0.4358, 0.6109, 0.8817, 0.5429, 0.5263, 0.7531, 0.1552,
0.7066]], requires_grad=True),
Parameter containing:
tensor([0.6617, 0.1079], requires_grad=True),
Parameter containing:
tensor([[0.9225, 0.8339]], requires_grad=True),
Parameter containing:
tensor([0.0786], requires_grad=True)]
Now, to wrap my NNModule with skorch, I'm using this:
from skorch import NeuralNetRegressor
torch.manual_seed(200)
net = NeuralNetRegressor(
module=NNModule(in_features=10),
criterion=torch.nn.MSELoss,
optimizer=torch.optim.SGD,
optimizer__lr=0.01,
max_epochs=100,
verbose=0
)
net.fit(np.transpose(X_train), np.transpose(y_train))
And I'd like to retrieve the weights obtained in the training. I've used dir(net) to see if the weights are stored in any attributes to no avail.
To retrieve the weights one needs to output them like this:
list(net.module.parameters())
I am creating a GRU to do some classification for a project, and I'm relatively new to Pytorch and implementing GRUs. I know similar questions like this one have been answered already but I can't seem to bring the same solution over to my own problem. I understand that there is an issue with the shape/order of my fc arrays but after trying to change things I can no longer see the trees for the wood. I would appreciate it if someone could point me in the right direction.
Below I have attached my code and the error. The datasets im using contain 24 features with a label in the 25th column.
# Imports
import pandas as pd
import numpy as np
import torch
import torchvision # torch package for vision related things
import torch.nn.functional as F # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets # Standard datasets
import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation
from torch import optim # For optimizers like SGD, Adam, etc.
from torch import nn # All neural network modules
from torch.utils.data import Dataset, DataLoader # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm # For a nice progress bar
from sklearn.preprocessing import StandardScaler
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
input_size = 24
hidden_size = 128
num_layers = 1
num_classes = 2
sequence_length = 1
learning_rate = 0.005
batch_size = 8
num_epochs = 3
# Recurrent neural network with GRU (many-to-one)
class RNN_GRU(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN_GRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
def forward(self, x):
# Set initial hidden and cell states
x = x.unsqueeze(0)
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# Forward propagate LSTM
out, _ = self.gru(x, h0)
out = out.reshape(out.shape[0], -1)
# Decode the hidden state of the last time step
out = self.fc(out)
return out
class MyDataset(Dataset):
def __init__(self,file_name):
stats_df=pd.read_csv(file_name)
x=stats_df.iloc[:,0:24].values
y=stats_df.iloc[:,24].values
self.x_train=torch.tensor(x,dtype=torch.float32)
self.y_train=torch.tensor(y,dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self,idx):
return self.x_train[idx],self.y_train[idx]
nomDs=MyDataset("nomStats.csv")
atkDs=MyDataset("atkStats.csv")
train_loader=DataLoader(dataset=nomDs,batch_size=batch_size)
test_loader=DataLoader(dataset=atkDs,batch_size=batch_size)
# Initialize network (try out just using simple RNN, or GRU, and then compare with LSTM)
model = RNN_GRU(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
# Get data to cuda if possible
data = data.to(device=device).squeeze(1)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent update step/adam step
optimizer.step()
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
# Set model to eval
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device).squeeze(1)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
# Toggle model back to train
model.train()
return num_correct / num_samples
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")
Traceback (most recent call last):
File "TESTGRU.py", line 87, in <module>
scores = model(data)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "TESTGRU.py", line 47, in forward
out = self.fc(out)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\linear.py", line 94, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\functional.py", line 1753, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1024 and 128x2)
It seems like these lines
# Forward propagate LSTM
out, _ = self.gru(x, h0)
out = out.reshape(out.shape[0], -1)
are the problem.
It appears that you only want to feed the hidden state of the last time step.
This could be read from the output in two ways:
If you want the output of all layers at the last time step, you should use the second return value of out, _ = self.gru(x, h0) not the first.
If you want to use just the last layer's output at the last time step (which seems to be the case), you should use
out[:, -1, :]. With this change, you may not need the
reshape operation.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms, datasets
#neural network class
class Net(nn.Module):
#intialize class
def __init__(self):
super().__init__()
#feedforward neural network passes data from input layer to output layer
#fully connected layer with input shape of 28*28 pixels (flatten image to one row) and output feature is size 64. Linear means flat layer
self.fc1 = nn.Linear(28*28,64)
#feed in data from fc1 to fc2
self.fc2 = nn.Linear(64,64)
self.fc3 = nn.Linear(64,64)
#output layer has input 64 and output of size 10 to represent 10 classes in MNIST
self.fc4 = nn.Linear(64,10)
#forward pass through the data
def forward(self, x):
#relu is activation function and performs operation on input data
#input and output dimenson of relu are the same
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
#softmax function gets probability distribution for each class adding up to 1 for output layer
x = F.log_softmax(self.fc4(x), dim=1)
return x
#declare a model
net = Net()
#print(net)
#passing in random data
x = torch.rand(28,28)
#resize to represent input shape (batch size, input x, input y)
x = x.view(-1,28,28)
#print(x)
#optmizer adjusts neural network based on error calculation
import torch.optim as optim
#net.parameters() means all the adjustable parts of the neural network, learning rate is amount of change (we don't want model to swerve based on one train case)
optimizer = optim.Adam(net.parameters(),lr=0.001)
#get datasets using torchvision.datasets transforms are application applied to data (transforms conversion to tensors)
train = torchvision.datasets.MNIST("", train=True, download=True,transform=transforms.Compose([transforms.ToTensor()]))
test = torchvision.datasets.MNIST("", train=False, download=True,transform=transforms.Compose([transforms.ToTensor()]))
#store in data loader, batch size is how many samples is passed through the model at once (in GPU memory), best batch size is between 8-64
#shuffling avoids feeding too much of one kind of image and leads to more generalization
trainset = torch.utils.data.DataLoader(train,batch_size=10,shuffle=True)
testset = torch.utils.data.DataLoader(test,batch_size=10,shuffle=True)
#full pass through data is epoch
EPOCHS = 3
for epoch in range(EPOCHS):
#data is a batch of data in the training set
for data in trainset:
#split into features and labels
features, labels = data
#print(features, labels)
#reset the gradient for next passes to avoid convoluting the results of multiple backpropogations
net.zero_grad()
#pass data into network (make sure input shape matches)
output = net(features.view(-1,28*28))
#compute error (output,expected)
loss = F.nll_loss(output,labels)
print("loss is the ", loss)
#backpropogate loss through trainiable parameters of model
loss.backward()
#adjust neural network
optimizer.step()
I am using Pytorch on Google colab. The error message says that the gradient isn't there.
I am unsure where the error stems from? I used this tutorial: https://www.youtube.com/watch?v=9j-_dOze4IM&list=PLQVvvaa0QuDdeMyHEYc0gxFpYwHY2Qfdh&index=4
Error message from Google colab
Error message
Here is an autoencoder I’m working on from tutorial:https://debuggercafe.com/implementing-deep-autoencoder-in-pytorch/
I’m just learning about autoencoders and I’ve modified the source encode a custom small dataset which consists of:
[0,1,0,1,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0]
It seems to work ok, but I’m unsure how to access the lower dimensional embedding values of dimension 2 (set by parameter out_features).
I've added a methods to the Autoencoder class to return the embedding , is this the recommended method of accessing the embedding's ?
Code:
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image
import warnings
import os
# import packages
import os
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import numpy as np
# utility functions
def get_device():
if torch.cuda.is_available():
device = 'cuda:0'
else:
device = 'cpu'
return device
device = get_device()
features = torch.tensor(np.array([ [0,1,0,1,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0],[0,1,1,0,0,1,0,1,0] ])).float()
tic_tac_toe_data_loader = torch.utils.data.DataLoader(features, batch_size=1, shuffle=True)
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.fc1 = nn.Linear(in_features=9, out_features=2)
def forward(self, x):
return F.sigmoid(self.fc1(x))
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.fc1 = nn.Linear(in_features=2, out_features=9)
def forward(self, x):
return F.sigmoid(self.fc1(x))
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
self.fc1 = Encoder()
self.fc2 = Decoder()
def forward(self, x):
return self.fc2(self.fc1(x))
net = Autoencoder()
net.to(device)
NUM_EPOCHS = 50
LEARNING_RATE = 1e-3
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)
# image transformations
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
outputs = None
def train(net, trainloader, NUM_EPOCHS):
train_loss = []
for epoch in range(NUM_EPOCHS):
running_loss = 0.0
for data in trainloader:
img = data
img = img.to(device)
img = img.view(img.size(0), -1)
# print('img.shape' , img.shape)
optimizer.zero_grad()
outputs = net(img)
loss = criterion(outputs, img)
loss.backward()
optimizer.step()
running_loss += loss.item()
loss = running_loss / len(trainloader)
train_loss.append(loss)
return train_loss
# train the network
train_loss = train(net, tic_tac_toe_data_loader, NUM_EPOCHS)
I can access the lower dimensional embedding using
print(Encoder().forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))
But is this using the trained weight values for the embedding ? If I call Encoder multiple times with same values:
print(Encoder().forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))
print(Encoder().forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))
different results are returned:
tensor([0.5083, 0.5020], grad_fn=<SigmoidBackward>)
tensor([0.4929, 0.6940], grad_fn=<SigmoidBackward>)
Why is this the case ? Is an extra training step being invoked as a result of calling Encoder ?
By calling Encoder() you are basically creating a new instance of the encoder everytime and the weights are randomly initialized each time.
Generally, you make one instance of it and train it, save the weights, and infer on it.
Also, for PyTorch, you need not call .forward(), but call the instance directly. Forward is called by it implicitly, including other hook methods if any.
enc = Encoder()
input = torch.from_numpy(np.asarray([0,1,0,1,0,1,0,1,0]).float()
print(enc(input))
print(enc(input))
Training pass happens when you pass the Encode() instance to train function. Calling Encoder() only creates a new object.
Since each object has it's own weights, and the weights are initialized randomly (see xavier and kaiming initialization), you are different outputs. Moving to a single object, you still have to explicitly train it with the train function.
Like other responder pointed out when you call Encoder() you generate new instances with randomly initialized weights. Because you are interested in lower dimensional embedding produced by your encoder you need to access the weights of the encoder in your trained net:
trained_encoder = net.fc1
Now that you have your encoder with trained weights the following lines should produces same result:
print(trained_encoder.forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))
print(trained_encoder.forward( torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()))
As pointed out by others you can further simplify by passing input directly:
test_input = torch.tensor(np.array([0,1,0,1,0,1,0,1,0])).float()
print(trained_encoder(test_input))
Can we input a continuous video which contains sequences of both positive classes and negative classes to train LSTM, on several thousand of such videos?
My overall objective is to mark videos realtime with particular
scenes(e.g. if I’ve 0-100 frames and frame number 30-60 contains some
yoga scenes, I need to mark them)
Right now the approach which I’m following is to split the video into two categories of positive sequences and negative sequences and train LSTM (on top of Mobnet CNN, FC replaced by LSTM layers).
But somehow this does not give any improvement compared to Mobnet
alone when we run evaluation on non-split videos.
Both Mobnet and LSTM are trained separately. I save output of Mobnet(FC removed) in numpy arrays and then read these arrays for training LSTM.
Here is the sample of code used for this approach:
epochs = 250
batch_size = 128
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
in_size = 1024
classes_no = 2
hidden_size = 512
layer_no = 2
self.lstm = nn.LSTM(in_size, hidden_size, layer_no, batch_first=True)
self.linear = nn.Linear(hidden_size, classes_no)
def forward(self, input_seq):
output_seq, _ = self.lstm(input_seq)
last_output = output_seq[:,-1]
class_predictions = self.linear(last_output)
return class_predictions
def nploader(npfile):
a = np.load(npfile)
return a
def train():
npdataloader = torchvision.datasets.DatasetFolder('./featrs/',
nploader, ['npy'], transform=None, target_transform=None)
data_loader = torch.utils.data.DataLoader(npdataloader,
batch_size=batch_size,
shuffle=False,
num_workers=1)
model = Model().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.8)
model.train()
for epoch in range(0, epochs):
for input_seq, target in data_loader:
optimizer.zero_grad()
output = model(input_seq.cuda())
err = loss(output.cuda(), target.cuda())
err.backward()
optimizer.step()
scheduler.step()
torch.save(model.state_dict(), 'lstm.ckpt')