I am attempting to create an encoder/decoder model with mini-batch. I continue to encounter an errors stating:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 6]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
The traceback reveals something is wrong with the y=self.linear(out) but I am unsure what exactly. Any help would be greatly appreciated. Below is the model. Thank you.
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
from sliding_window import sliding_window
from training_datasets import get_training_datasets_batch
torch.autograd.set_detect_anomaly(True)
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size, num_layers=1):
super(Encoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
def forward(self, x):
flat = x.view(x.shape[0], x.shape[1], self.input_size)
out,h = self.gru(flat)
return out, h
class Decoder(nn.Module):
def __init__(self, input_size, hidden_size, output_size=6, num_layers=1):
super(Decoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.output_size = output_size
self.gru = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
self.linear = nn.Linear(hidden_size, output_size)
self.ReLU = nn.ReLU()
def forward(self, x, h):
x = x.unsqueeze(1)
out, h = self.gru(x, h)
out = out.squeeze(1)
print(out.shape)
y = self.linear(out)
print(y.shape)
y = self.ReLU(y)
return y,h
class EncoderDecoder(nn.Module):
def __init__(self, hidden_size, input_size=6, output_size=6):
super(EncoderDecoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.encoder = Encoder(input_size=input_size, hidden_size=hidden_size)
self.decoder = Decoder(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
def train_model(self, ts, epochs, target_len, features, batch_size=64, test_len=288, method = 'teacher_forcing', tfr = 0.5, lr = 0.01, dynamic_tf=False):
X,Y= sliding_window(ts, features=288, target_len=target_len)
x_train, x_val, x_test, y_train, y_val, y_test = get_training_datasets_batch(X,Y, features, test_len=test_len, batch_size=batch_size)
losses = np.full(epochs,np.nan)
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, self.parameters()),
lr=lr)
criterion = nn.MSELoss()
for e in range(epochs):
print('Starting epoch {}'.format(e))
x_train_data = iter(x_train)
y_train_data = iter(y_train)
x_val_data = iter(x_val)
y_val_data = iter(y_val)
x_train_shape = list(x_train)[0].shape
# predicted = torch.zeros(target_len,batch_size,x_train_shape[2])
# print(predicted.shape)
loss=0
for x_train_in in x_train_data:
optimizer.zero_grad()
x_train_in = Variable(x_train_in)
y_train_in = Variable(next(y_train_data).transpose(0,1))
_, enc_h = self.encoder(x_train_in)
dec_in = x_train_in[:,-1,:]
dec_h = enc_h
if method == 'recursive':
for t in range(target_len):
dec_out, dec_h = self.decoder(dec_in, dec_h)
predicted = dec_out
dec_in = dec_out
loss += criterion(predicted,y_train_in[t])
loss.backward(retain_graph=True)
optimizer.step()
The problem in this case was the loss.backward(retain_graph=True). The code started working after adding the line loss=0. The loss value continues to increase and needs to be reset.
loss.backward()
optimizer.step()
loss=0
Related
I try to fight with overfitting, this is why I decided to look through documentation (https://pytorch-lightning.readthedocs.io/en/stable/common/evaluation_basic.html#train-with-the-validation-loop), where I found that you can pass in Trainer.fit training and validation dataloader. The question is that - should I use this method, or I can simply pass the dataloader class in Trainer.fit to prevent overfitting ?
Code DataLoader:
class ClassifierDataModule(pl.LightningDataModule):
def __init__(self, train_dataset:pd.DataFrame, val_dataset:pd.DataFrame, batch_size:int):
super().__init__()
self.prepare_data_per_node = False
self.train_dataset = train_dataset
self.val_dataset = val_dataset
self.batch_size=batch_size
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=os.cpu_count())
def val_dataloader(self):
return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=True, num_workers=os.cpu_count())
data_module_classifier = ClassifierDataModule(train_dataset,val_dataset,test_dataset,BATCH_SIZE )
And here is my Trainer.fit():
model = MulticlassClassificationLIGHT(class_weights)
#trainer.fit(model, data_module_classifier) # SHOULD I USE THIS METHOD TO PREVENT OVERFITTING
trainer.fit(model, data_module_classifier.train_dataloader(),data_module_classifier.val_dataloader() ) # OR THIS ONE ?
My LightningModule just in case:
class MulticlassClassificationLIGHT(pl.LightningModule):
def __init__(self,class_weights):
super(MulticlassClassificationLIGHT, self).__init__()
self.num_feature=35
self.num_class=36
self.layer_1 = nn.Linear(self.num_feature, 512)
self.layer_2 = nn.Linear(512, 128)
self.layer_3 = nn.Linear(128, 64)
self.layer_out = nn.Linear(64, self.num_class)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.2)
self.batchnorm1 = nn.BatchNorm1d(512)
self.batchnorm2 = nn.BatchNorm1d(128)
self.batchnorm3 = nn.BatchNorm1d(64)
self.loss = nn.CrossEntropyLoss(weight=class_weights.to(device))
def forward(self, x):
x = self.layer_1(x)
x = self.batchnorm1(x)
x = self.relu(x)
x = self.layer_2(x)
x = self.batchnorm2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer_3(x)
x = self.batchnorm3(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer_out(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self.forward(x)
loss = self.loss(logits, y)
self.log("train_loss", loss, prog_bar=True, logger=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self.forward(x)
loss = self.loss(logits, y)
self.log("val_loss", loss, prog_bar=True, logger=True) # I ask Trainer to "ModelCheckpoint" this loss
return loss
Passing validation data loader during training does not fix overfitting. It allows to measure the overfitting/underfitting of the model. We want performance on validation data to be closer to performance on training data in case of a well-fit model.
Regarding the syntax, This should work :
trainer.fit(model=model, train_dataloaders =data_module_classifier.train_dataloader(), val_dataloaders =data_module_classifier.val_dataloader())
documentation for fit here - https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-class-api
I keep getting the error: TypeError: iteration over a 0-d tensor in the code bellow:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import pandas as pd
import base64
from torch import tensor
import itertools
import random
# Define the neural network model
class ModelClass(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size, device):
super(ModelClass, self).__init__()
self.device = device
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.GRU(input_size, hidden_size, num_layers)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, input, hidden):
output, hidden = self.rnn(input, hidden)
output = self.fc(output)
return output, hidden
def init_hidden(self):
return torch.zeros(self.num_layers, 1, self.hidden_size, device=self.device)
def reward(self,r:int):
for p in self.model.parameters():
p.data += r * p.grad
padding_char = '\0'
def get_data():
file = open('./payloads/all.txt','r',errors='ignore')
training_data = file.readlines()
final = []
for t in training_data:
final.append(t.strip())
training_data = final
longest = len(max(training_data, key=len))
shortest = len(min(training_data, key=len))
char_to_int = {c: i for i, c in enumerate(sorted(set(''.join(training_data))))}
training_data = [[char_to_int[c] for c in string] for string in training_data]
return {'data': training_data, 'length': len(char_to_int), 'original': file.readlines(), 'long': longest, 'short': shortest}
def initialize():
data = get_data()
original_data = data['original']
input_size = data['length']
hidden_size = 256
num_layers = 8
minimum = data['short']
maximum = data['long']
output_size = random.randint(minimum,maximum)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ModelClass(input_size, hidden_size, num_layers, output_size, device)
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()
return model, optimizer, loss_fn
def int_train_model():
data = get_data()
original_data = data['original']
max_length = data['long']
shortest = data['short']
data['data'] = list(data['data'])
fin = []
for lis in data['data']:
for li in lis:
fin.append(li)
data['data'] = fin
data['data'] = torch.tensor(data['data'], dtype=torch.long)
print(data['data'])
model, optimizer, loss_fn = initialize()
num_epochs = 50
batch_size = 32
model.train()
for epoch in range(num_epochs):
hidden = model.init_hidden()
for input, label in data['data']:
optimizer.zero_grad()
input = input.view(-1, 1).to(device)
label = label.view(-1, 1).to(device)
output, hidden = model(input, hidden)
loss = loss_fn(output, label)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}/{num_epochs}: Loss {loss.item():.4f}')
if __name__ == '__main__':
int_train_model()
I have the model try and craft it's own SQLi payloads using a wordlist called all.txt. Thank you in advance and I have very little experience with pytorch, numpy, and pandas. So if it would be possible to explain why the error happens and what the fix does that would be great!
i use nn.BCEWithLogitsLoss()
when i training my model, the loss grows higher and higher, why? how can i solve this problem?
fundermental code:
loss_fn = nn.BCEWithLogitsLoss()
def train_loop(dataloader, model, loss_fn, optimizer):
for batch, (X, y) in enumerate(dataloader):
#X,y=X.to(device), y.to(device)
m = nn.Sigmoid()
predict=model(X.float())
loss=loss_fn(m(predict),y.unsqueeze(1).float())
optimizer.zero_grad()
loss.backward()# Calculate Gradients
optimizer.step()# Update Weights
full code:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
class myDataset(Dataset):
def __init__(self,data,label):#, annotations_file, img_dir, transform=None, target_transform=None):
df = pd.read_csv(data, encoding='gbk')
df = df.fillna(value=0)
self.data = np.array(df)
df = pd.read_csv(label, encoding='gbk')
df = df.fillna(value=0)
self.label = np.array(df).reshape(-1)
#self.transform = transform
#self.target_transform = target_transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.label[idx]
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.flatten = nn.Flatten()
self.network = nn.Sequential(
#nn.Conv2d(in_channels=1, out_channels=6,kernel_size=5),
nn.Linear(27, 100),
nn.ReLU(),
nn.Linear(100, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 1),
nn.ReLU()
)
def forward(self, x):
x=self.flatten(x)
return self.network(x)
def train_loop(dataloader, model, loss_fn, optimizer):
for batch, (X, y) in enumerate(dataloader):
#X,y=X.to(device), y.to(device)
m = nn.Sigmoid()
predict=model(X.float())
loss=loss_fn(m(predict),y.unsqueeze(1).float())
optimizer.zero_grad()
loss.backward()# Calculate Gradients
optimizer.step()# Update Weights
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{len(dataloader.dataset):>5d}]")
model=Network()
batch_size = 64
learning_rate = 1e-3
epochs = 5
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
trainDataloader = DataLoader(myDataset("mydata/traindata.csv","mydata/trainlabel.csv"),batch_size=batch_size,shuffle=True)
train_loop(trainDataloader, model, loss_fn, optimizer)
I think you don't need this line m = nn.Sigmoid().
The document said that: This loss combines a Sigmoid layer and the BCELoss in one single class. You can check the loss here.
def train_loop(dataloader, model, loss_fn, optimizer):
for batch, (X, y) in enumerate(dataloader):
#X,y=X.to(device), y.to(device)
predict=model(X.float())
loss=loss_fn(predict,y.unsqueeze(1).float())
/pytorch/aten/src/ATen/native/cudnn/RNN.cpp:1266: UserWarning: RNN module weights are not part of single contiguous chunk of memory.
This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().
Hello. I am using pytorch.
I am trying to use DataParallel function in pytorch,
but the model is LSTM. I'm warned to flatten the model again,
but I don't know when and where to flatten.
Can you let me know?
This is my model
import torch.nn as nn
from torchvision import models
class ConvLstm(nn.Module):
def __init__(self, latent_dim, model, hidden_size, lstm_layers, bidirectional, n_class):
super(ConvLstm, self).__init__()
self.conv_model = Pretrained_conv(latent_dim, model)
self.Lstm = Lstm(latent_dim, hidden_size, lstm_layers, bidirectional)
self.output_layer = nn.Sequential(
nn.Linear(2 * hidden_size if bidirectional ==
True else hidden_size, n_class),
nn.Softmax(dim=-1)
)
def forward(self, x):
batch_size, timesteps, channel_x, h_x, w_x = x.shape
conv_input = x.view(batch_size * timesteps, channel_x, h_x, w_x)
conv_output = self.conv_model(conv_input)
lstm_input = conv_output.view(batch_size, timesteps, -1)
lstm_output = self.Lstm(lstm_input)
lstm_output = lstm_output[:, -1, :]
output = self.output_layer(lstm_output)
return output
class Pretrained_conv(nn.Module):
def __init__(self, latent_dim, model):
if model == 'resnet152':
super(Pretrained_conv, self).__init__()
self.conv_model = models.resnet152(pretrained=True)
# ====== freezing all of the layers ======
for param in self.conv_model.parameters():
param.requires_grad = False
# ====== changing the last FC layer to an output with the size we need. this layer is un freezed ======
self.conv_model.fc = nn.Linear(
self.conv_model.fc.in_features, latent_dim)
def forward(self, x):
return self.conv_model(x)
class Lstm(nn.Module):
def __init__(self, latent_dim, hidden_size, lstm_layers, bidirectional):
super(Lstm, self).__init__()
self.Lstm = nn.LSTM(latent_dim, hidden_size=hidden_size,
num_layers=lstm_layers, batch_first=True, bidirectional=bidirectional)
self.hidden_state = None
def reset_hidden_state(self):
self.hidden_state = None
def forward(self, x):
output, self.hidden_state = self.Lstm(x, self.hidden_state)
return output
Enter LSTM and execute the following code.
def foward_step(model, images, labels, criterion, mode=''):
model.module.Lstm.reset_hidden_state()
if mode == 'test':
with torch.no_grad():
output = model(images)
else:
output = model(images)
loss = criterion(output, labels)
# Accuracy calculation
predicted_labels = output.detach().argmax(dim=1)
acc = (predicted_labels == labels).cpu().numpy().sum()
return loss, acc, predicted_labels.cpu()
This is main
model = nn.DataParallel(model, device_ids=[0,1,2,3]).cuda()
I am relatively new to Pytorch and have been training an LSTM model. Any feedback on code in general would be appreciated.
When I train the model I receive the following warning
UserWarning: Using a target size (torch.Size([4050, 1, 1])) that is
different to the input size (torch.Size([1])). This will likely lead
to incorrect results due to broadcasting. Please ensure they have the
same size.
Can anyone help me with this problem?
My goal is to train a model on a time series containing multiple features. Please see the code below:
LSTM:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from Tools.data_loader import CreateDataset
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(LSTMModel, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers)
self.forecast = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
batch_size = 1
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).requires_grad_()
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).requires_grad_()
out, (hn,cn) = self.lstm(x, (h0.detach(), c0.detach()))
y_pred = self.forecast(out[-1].view(batch_size,-1))
return y_pred.view(-1)
dataset = CreateDataset('Data/Bloomberg_PV_weather.csv', 'Datetime', 0.8, 'Power') #PLACEHOLDER
input_dim = dataset.num_features
hidden_dim = 50
num_layers = 1
output_dim = 1
num_epochs = 50
X_train = dataset.X_train
y_train = dataset.y_train
X_test = dataset.X_test
y_test = dataset.y_test
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
criterion = nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters())
hist = np.zeros(num_epochs)
for epoch in range(num_epochs):
optimizer.zero_grad()
output=model(X_train)
loss = criterion(output,y_train)
if epoch % 100 == 0:
print('Epoch ', epoch, 'Loss: ',loss.item())
hist[epoch] = loss.item()
loss.backward()
optimizer.step()
Data Loader:
'''
Loads data and creates train and test sets
'''
import torch
import math
import pandas as pd
class CreateDataset():
def __init__(self, file, datetime_col, train_proportion, target):
self.file = file
self.datetime_col = datetime_col
self.train_proportion = train_proportion
self.target = target
self.data = self.load_data()
self.train_set, self.test_set = self.split_data()
self.X_train, self.y_train,self.X_test, self.y_test, self.num_features = self.reshape_data()
def load_data(self):
'''
Reads in data
'''
data = pd.read_csv(self.file, header=0)
data.drop(columns=self.datetime_col, inplace=True)
return data
def split_data(self):
'''
Creates test and train sets
'''
train_length = math.ceil(len(self.data)*self.train_proportion)
train_set = self.data[0:train_length]
test_set = self.data[train_length:]
return train_set, test_set
def reshape_data(self):
'''
Splits datasets into X and y sets and reshapes into 3D tensor
'''
num_features = (len(self.test_set.columns)-1)
y_train = torch.tensor(self.train_set[self.target].values).float()
X_train = torch.tensor(self.train_set.drop(columns=self.target).values).float()
y_test = torch.tensor(self.test_set[self.target].values).float()
X_test = torch.tensor(self.test_set.drop(columns=self.target).values).float()
X_train = X_train.view(-1,1,num_features)
y_train = y_train.view(-1,1,1)
X_test = X_test.view(-1,1,num_features)
y_test = y_test.view(-1,1,1)
return X_train, y_train, X_test, y_test, num_features