Issues with pytorch tensors iteration - python

I keep getting the error: TypeError: iteration over a 0-d tensor in the code bellow:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import pandas as pd
import base64
from torch import tensor
import itertools
import random
# Define the neural network model
class ModelClass(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size, device):
super(ModelClass, self).__init__()
self.device = device
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.GRU(input_size, hidden_size, num_layers)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, input, hidden):
output, hidden = self.rnn(input, hidden)
output = self.fc(output)
return output, hidden
def init_hidden(self):
return torch.zeros(self.num_layers, 1, self.hidden_size, device=self.device)
def reward(self,r:int):
for p in self.model.parameters():
p.data += r * p.grad
padding_char = '\0'
def get_data():
file = open('./payloads/all.txt','r',errors='ignore')
training_data = file.readlines()
final = []
for t in training_data:
final.append(t.strip())
training_data = final
longest = len(max(training_data, key=len))
shortest = len(min(training_data, key=len))
char_to_int = {c: i for i, c in enumerate(sorted(set(''.join(training_data))))}
training_data = [[char_to_int[c] for c in string] for string in training_data]
return {'data': training_data, 'length': len(char_to_int), 'original': file.readlines(), 'long': longest, 'short': shortest}
def initialize():
data = get_data()
original_data = data['original']
input_size = data['length']
hidden_size = 256
num_layers = 8
minimum = data['short']
maximum = data['long']
output_size = random.randint(minimum,maximum)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ModelClass(input_size, hidden_size, num_layers, output_size, device)
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()
return model, optimizer, loss_fn
def int_train_model():
data = get_data()
original_data = data['original']
max_length = data['long']
shortest = data['short']
data['data'] = list(data['data'])
fin = []
for lis in data['data']:
for li in lis:
fin.append(li)
data['data'] = fin
data['data'] = torch.tensor(data['data'], dtype=torch.long)
print(data['data'])
model, optimizer, loss_fn = initialize()
num_epochs = 50
batch_size = 32
model.train()
for epoch in range(num_epochs):
hidden = model.init_hidden()
for input, label in data['data']:
optimizer.zero_grad()
input = input.view(-1, 1).to(device)
label = label.view(-1, 1).to(device)
output, hidden = model(input, hidden)
loss = loss_fn(output, label)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}/{num_epochs}: Loss {loss.item():.4f}')
if __name__ == '__main__':
int_train_model()
I have the model try and craft it's own SQLi payloads using a wordlist called all.txt. Thank you in advance and I have very little experience with pytorch, numpy, and pandas. So if it would be possible to explain why the error happens and what the fix does that would be great!

Related

Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1]))

I'm currently switching from tensorflow to pytorch and facing the warning UserWarning: Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size
I came across that unsqueeze(1) on my target could help to resolve my problem, however, I do so obtain problems in regard of the multitarget which results from the shape my loss function (crossentropy) expects.
Here is a minimal example to my code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
X1 = torch.randn(400, 1, 9999)
X2 = torch.randn((400,1, 9999))
aux1 = torch.randn(400,1)
aux2 = torch.randn(400,1)
aux3 = torch.randn(400,1)
y1 = torch.rand(400,)
y2 = torch.rand(400,)
y3 = torch.rand(400,)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
# In[18]:
class MultiTaskDataset:
def __init__(self,
amplitude,
phase,
weight,
temperature,
humidity,
shelf_life_clf,
shelf_life_pred,
thickness_pred
):
self.amplitude = amplitude
self.phase = phase
self.weight = weight
self.temperature = temperature
self.humidity = humidity
self.shelf_life_clf = shelf_life_clf
self.shelf_life_pred = shelf_life_pred
self.thickness_pred = thickness_pred
def __len__(self):
return self.amplitude.shape[0]
def __getitem__(self, idx):
#inputs
amplitude = self.amplitude[idx]
phase = self.phase[idx]
weight = self.weight[idx]
temperature = self.temperature[idx]
humidity = self.humidity[idx]
#outputs
shelf_life_clf = self.shelf_life_clf[idx]
shelf_life_reg = self.shelf_life_pred[idx]
thickness_pred = self.thickness_pred[idx]
return ([torch.tensor(amplitude, dtype=torch.float32),
torch.tensor(phase, dtype=torch.float32),
torch.tensor(weight, dtype=torch.float32),
torch.tensor(temperature, dtype=torch.float32),
torch.tensor(humidity, dtype=torch.float32)],
[torch.tensor(shelf_life_clf, dtype=torch.long),
torch.tensor(shelf_life_reg, dtype=torch.float32),
torch.tensor(thickness_pred, dtype=torch.float32)])
# In[19]:
# train loader
dataset = MultiTaskDataset(X1, X2, aux1, aux2, aux3,
y1,y2,y3)
train_loader = DataLoader(dataset, batch_size=512, shuffle=True, num_workers=0)
# test loader
# In[20]:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.features_amp = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.features_phase = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.backbone1 = nn.Sequential(
nn.LazyConv1d(64,3,1),
nn.LazyConv1d(64,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone2 = nn.Sequential(
nn.Conv1d(64, 32,3,1),
nn.Conv1d(32, 32,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone3 = nn.Sequential(
nn.Conv1d(32, 16,3,1),
nn.Conv1d(16, 16,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.classifier = nn.LazyLinear(2)
self.shelf_life_reg = nn.LazyLinear(1)
self.thickness_reg = nn.LazyLinear(1)
def forward(self, x1, x2, aux1, aux2, aux3):
x1 = self.features_amp(x1)
x2 = self.features_phase(x2)
x1 = x1.view(x1.size(0),-1)
x2 = x2.view(x2.size(0),-1)
x = torch.cat((x1, x2), dim=-1)
print(x.size())
x = x.unsqueeze(1)
print(x.size())
x = self.backbone1(x)
print(x.size())
x = torch.flatten(x, start_dim=1, end_dim=-1)
x = torch.cat([x, aux1, aux2, aux3], dim=-1)
shelf_life_clf = self.classifier(x)
shelf_life_reg = self.shelf_life_reg(x)
thickness_reg = self.thickness_reg(x)
return (shelf_life_clf,
shelf_life_reg,
thickness_reg)
model = MyModel()
optimizer = optim.Adam(model.parameters(), lr=0.003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
# In[21]:
def train(epoch):
model.train()
#exp_lr_scheduler.step()
arr_loss = []
#first_batch = next(iter(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
#amp, phase = data
clf, reg1, reg2 = target
#print(amp.shape, phase.shape)
#print(target[2].shape)
if torch.cuda.is_available():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = [data[i].cuda() for i in range(len(data))]
target = [target[i].cuda() for i in range(len(target))]
model.to(device)
optimizer.zero_grad()
output1, output2, output3 = model(*data)
#losses
loss = criterion1(output1, target[0].long())
loss1 = criterion2(output2, target[1].float())
loss2 = criterion3(output3, target[2].float())
loss = loss + loss1 + loss2
#metrices
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.data))
arr_loss.append(loss.data)
return arr_loss
def averaged_accuracy(outputs, targets):
assert len(outputs) != len(targets), "number of outputs should equal the number of targets"
accuracy = []
for i in range(len(outputs)):
_, predicted = torch.max(output1.data, 1)
total += target[0].size(0)
correct += (predicted == target[0]).sum()
acc = correct / total *100
accuracy.append(acc)
return torch.mean(accuracy)
# In[22]:
optimizer = optim.Adam(model.parameters(), lr=0.00003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
n_epochs = 10
for epoch in range(n_epochs):
train(epoch)
Can anybody provide guidance to resolve this problem?

Trying to compute the loss of an encoder/decoder model

I am attempting to create an encoder/decoder model with mini-batch. I continue to encounter an errors stating:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 6]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
The traceback reveals something is wrong with the y=self.linear(out) but I am unsure what exactly. Any help would be greatly appreciated. Below is the model. Thank you.
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
from sliding_window import sliding_window
from training_datasets import get_training_datasets_batch
torch.autograd.set_detect_anomaly(True)
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size, num_layers=1):
super(Encoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
def forward(self, x):
flat = x.view(x.shape[0], x.shape[1], self.input_size)
out,h = self.gru(flat)
return out, h
class Decoder(nn.Module):
def __init__(self, input_size, hidden_size, output_size=6, num_layers=1):
super(Decoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.output_size = output_size
self.gru = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
self.linear = nn.Linear(hidden_size, output_size)
self.ReLU = nn.ReLU()
def forward(self, x, h):
x = x.unsqueeze(1)
out, h = self.gru(x, h)
out = out.squeeze(1)
print(out.shape)
y = self.linear(out)
print(y.shape)
y = self.ReLU(y)
return y,h
class EncoderDecoder(nn.Module):
def __init__(self, hidden_size, input_size=6, output_size=6):
super(EncoderDecoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.encoder = Encoder(input_size=input_size, hidden_size=hidden_size)
self.decoder = Decoder(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
def train_model(self, ts, epochs, target_len, features, batch_size=64, test_len=288, method = 'teacher_forcing', tfr = 0.5, lr = 0.01, dynamic_tf=False):
X,Y= sliding_window(ts, features=288, target_len=target_len)
x_train, x_val, x_test, y_train, y_val, y_test = get_training_datasets_batch(X,Y, features, test_len=test_len, batch_size=batch_size)
losses = np.full(epochs,np.nan)
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, self.parameters()),
lr=lr)
criterion = nn.MSELoss()
for e in range(epochs):
print('Starting epoch {}'.format(e))
x_train_data = iter(x_train)
y_train_data = iter(y_train)
x_val_data = iter(x_val)
y_val_data = iter(y_val)
x_train_shape = list(x_train)[0].shape
# predicted = torch.zeros(target_len,batch_size,x_train_shape[2])
# print(predicted.shape)
loss=0
for x_train_in in x_train_data:
optimizer.zero_grad()
x_train_in = Variable(x_train_in)
y_train_in = Variable(next(y_train_data).transpose(0,1))
_, enc_h = self.encoder(x_train_in)
dec_in = x_train_in[:,-1,:]
dec_h = enc_h
if method == 'recursive':
for t in range(target_len):
dec_out, dec_h = self.decoder(dec_in, dec_h)
predicted = dec_out
dec_in = dec_out
loss += criterion(predicted,y_train_in[t])
loss.backward(retain_graph=True)
optimizer.step()
The problem in this case was the loss.backward(retain_graph=True). The code started working after adding the line loss=0. The loss value continues to increase and needs to be reset.
loss.backward()
optimizer.step()
loss=0

torch.save(model.state_dict()) line comes error while model complated train and trying to save it how can i solve?

I'm working on some code about nlp. I want to train and save model but here comes this error. I searched some documentation but i didn't find right solution. How can i solve this problem?
import torch,time
import torch.nn as nn
input_dim = 5
hidden_dim = 10
n_layers = 1
lstm_layer = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
batch_size = 1
seq_len = 1
inp = torch.randn(batch_size, seq_len, input_dim)
hidden_state = torch.randn(n_layers, batch_size, hidden_dim)
cell_state = torch.randn(n_layers, batch_size, hidden_dim)
hidden = (hidden_state, cell_state)
out, hidden = lstm_layer(inp, hidden)
print("Output shape: ", out.shape)
print("Hidden: ", hidden)
seq_len = 3
inp = torch.randn(batch_size, seq_len, input_dim)
out, hidden = lstm_layer(inp, hidden)
print(out.shape)
# Obtaining the last output
out = out.squeeze()[-1, :]
print(out.shape)
import bz2
from collections import Counter
import re
import nltk
import numpy as np
#nltk.download('punkt')
train_file = bz2.BZ2File('C:/Users/DELL/Dogal-Dil-Isleme/Xml-Files/trwiktionary-20200301-pages-articles-multistream.xml.bz2')
test_file = bz2.BZ2File('C:/Users/DELL/Dogal-Dil-Isleme/Xml-Files/trwikisource-20200601-pages-articles.xml.bz2')
train_file = train_file.readlines()
test_file = test_file.readlines()
num_train = 200
num_test = 50
train_file = [x.decode('utf-8') for x in train_file[:num_train]]
test_file = [x.decode('utf-8') for x in test_file[:num_test]]
train_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in train_file]
train_sentences = [x.split(' ', 1)[1][:-1].lower() for x in train_file]
test_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in test_file]
test_sentences = [x.split(' ', 1)[1][:-1].lower() for x in test_file]
for i in range(len(train_sentences)):
train_sentences[i] = re.sub('\d','0',train_sentences[i])
for i in range(len(test_sentences)):
test_sentences[i] = re.sub('\d','0',test_sentences[i])
for i in range(len(train_sentences)):
if 'www.' in train_sentences[i] or 'http:' in train_sentences[i] or 'https:' in train_sentences[i] or '.com' in train_sentences[i]:
train_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", train_sentences[i])
for i in range(len(test_sentences)):
if 'www.' in test_sentences[i] or 'http:' in test_sentences[i] or 'https:' in test_sentences[i] or '.com' in test_sentences[i]:
test_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", test_sentences[i])
words = Counter() # Dictionary that will map a word to the number of times it appeared in all the training sentences
for i, sentence in enumerate(train_sentences):
train_sentences[i] = []
for word in nltk.word_tokenize(sentence):
words.update([word.lower()])
train_sentences[i].append(word)
if i%20000 == 0:
print(str((i*100)/num_train) + "% done")
print("100% done")
words = {k:v for k,v in words.items() if v>1}
words = sorted(words, key=words.get, reverse=True)
words = ['_PAD','_UNK'] + words
word2idx = {o:i for i,o in enumerate(words)}
idx2word = {i:o for i,o in enumerate(words)}
for i, sentence in enumerate(train_sentences):
train_sentences[i] = [word2idx[word] if word in word2idx else 0 for word in sentence]
for i, sentence in enumerate(test_sentences):
# For test sentences, we have to tokenize the sentences as well
test_sentences[i] = [word2idx[word.lower()] if word.lower() in word2idx else 0 for word in nltk.word_tokenize(sentence)]
def pad_input(sentences, seq_len):
features = np.zeros((len(sentences), seq_len),dtype=int)
for ii, review in enumerate(sentences):
if len(review) != 0:
features[ii, -len(review):] = np.array(review)[:seq_len]
return features
seq_len = 200 # The length that the sentences will be padded/shortened to
train_sentences = pad_input(train_sentences, seq_len)
test_sentences = pad_input(test_sentences, seq_len)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
split_frac = 0.5 # 50% validation, 50% test
split_id = int(split_frac * len(test_sentences))
val_sentences, test_sentences = test_sentences[:split_id], test_sentences[split_id:]
val_labels, test_labels = test_labels[:split_id], test_labels[split_id:]
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
train_data = TensorDataset(torch.from_numpy(train_sentences), torch.from_numpy(train_labels))
val_data = TensorDataset(torch.from_numpy(val_sentences), torch.from_numpy(val_labels))
test_data = TensorDataset(torch.from_numpy(test_sentences), torch.from_numpy(test_labels))
batch_size = 200
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()
# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
device = torch.device("cuda")
else:
device = torch.device("cpu")
class SentimentNet(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
super(SentimentNet, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
self.dropout = nn.Dropout(drop_prob)
self.fc = nn.Linear(hidden_dim, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x, hidden):
batch_size = x.size(0)
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
out = self.dropout(lstm_out)
out = self.fc(out)
out = self.sigmoid(out)
out = out.view(batch_size, -1)
out = out[:,-1]
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
return hidden
vocab_size = len(word2idx) + 1
output_size = 1
embedding_dim = 400
hidden_dim = 512
n_layers = 2
model = SentimentNet(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)
model.to(device)
lr=0.005
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 2
counter = 0
print_every = 1000
clip = 5
valid_loss_min = np.Inf
model.train()
for i in range(epochs):
h = model.init_hidden(batch_size)
for inputs, labels in train_loader:
counter += 1
h = tuple([e.data for e in h])
inputs, labels = inputs.to(device), labels.to(device)
model.zero_grad()
output, h = model(inputs, h)
loss = criterion(output.squeeze(), labels.float())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
if counter%print_every == 0:
val_h = model.init_hidden(batch_size)
val_losses = []
model.eval()
for inp, lab in val_loader:
val_h = tuple([each.data for each in val_h])
inp, lab = inp.to(device), lab.to(device)
out, val_h = model(inp, val_h)
val_loss = criterion(out.squeeze(), lab.float())
val_losses.append(val_loss.item())
model.train()
print("Epoch: {}/{}...".format(i+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
if np.mean(val_losses) <= valid_loss_min:
torch.save(model.state_dict(), 'C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict.pt')
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
valid_loss_min = np.mean(val_losses)
time.sleep(1)
# Loading the best model
model.load_state_dict(torch.load('C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict.pt'))
test_losses = []
num_correct = 0
h = model.init_hidden(batch_size)
model.eval()
for inputs, labels in test_loader:
h = tuple([each.data for each in h])
inputs, labels = inputs.to(device), labels.to(device)
output, h = model(inputs, h)
test_loss = criterion(output.squeeze(), labels.float())
test_losses.append(test_loss.item())
pred = torch.round(output.squeeze()) # Rounds the output to 0/1
correct_tensor = pred.eq(labels.float().view_as(pred))
correct = np.squeeze(correct_tensor.cpu().numpy())
num_correct += np.sum(correct)
print("Test loss: {:.3f}".format(np.mean(test_losses)))
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}%".format(test_acc*100))
i tried a create new folder and change path but all the ways comes error :)
i read pytorch documentation and change recommended code but error still came.
i will share some link for your reading about this issue.
same issue
pytorch documentation
how can i fix or is there any alternative way to save model?
Try changing it to: model.load_state_dict(torch.load('C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict'))

pytorch, Using nn.DataParallel in LSTM

/pytorch/aten/src/ATen/native/cudnn/RNN.cpp:1266: UserWarning: RNN module weights are not part of single contiguous chunk of memory.
This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().
Hello. I am using pytorch.
I am trying to use DataParallel function in pytorch,
but the model is LSTM. I'm warned to flatten the model again,
but I don't know when and where to flatten.
Can you let me know?
This is my model
import torch.nn as nn
from torchvision import models
class ConvLstm(nn.Module):
def __init__(self, latent_dim, model, hidden_size, lstm_layers, bidirectional, n_class):
super(ConvLstm, self).__init__()
self.conv_model = Pretrained_conv(latent_dim, model)
self.Lstm = Lstm(latent_dim, hidden_size, lstm_layers, bidirectional)
self.output_layer = nn.Sequential(
nn.Linear(2 * hidden_size if bidirectional ==
True else hidden_size, n_class),
nn.Softmax(dim=-1)
)
def forward(self, x):
batch_size, timesteps, channel_x, h_x, w_x = x.shape
conv_input = x.view(batch_size * timesteps, channel_x, h_x, w_x)
conv_output = self.conv_model(conv_input)
lstm_input = conv_output.view(batch_size, timesteps, -1)
lstm_output = self.Lstm(lstm_input)
lstm_output = lstm_output[:, -1, :]
output = self.output_layer(lstm_output)
return output
class Pretrained_conv(nn.Module):
def __init__(self, latent_dim, model):
if model == 'resnet152':
super(Pretrained_conv, self).__init__()
self.conv_model = models.resnet152(pretrained=True)
# ====== freezing all of the layers ======
for param in self.conv_model.parameters():
param.requires_grad = False
# ====== changing the last FC layer to an output with the size we need. this layer is un freezed ======
self.conv_model.fc = nn.Linear(
self.conv_model.fc.in_features, latent_dim)
def forward(self, x):
return self.conv_model(x)
class Lstm(nn.Module):
def __init__(self, latent_dim, hidden_size, lstm_layers, bidirectional):
super(Lstm, self).__init__()
self.Lstm = nn.LSTM(latent_dim, hidden_size=hidden_size,
num_layers=lstm_layers, batch_first=True, bidirectional=bidirectional)
self.hidden_state = None
def reset_hidden_state(self):
self.hidden_state = None
def forward(self, x):
output, self.hidden_state = self.Lstm(x, self.hidden_state)
return output
Enter LSTM and execute the following code.
def foward_step(model, images, labels, criterion, mode=''):
model.module.Lstm.reset_hidden_state()
if mode == 'test':
with torch.no_grad():
output = model(images)
else:
output = model(images)
loss = criterion(output, labels)
# Accuracy calculation
predicted_labels = output.detach().argmax(dim=1)
acc = (predicted_labels == labels).cpu().numpy().sum()
return loss, acc, predicted_labels.cpu()
This is main
model = nn.DataParallel(model, device_ids=[0,1,2,3]).cuda()

Target size different to input size - LSTM

I am relatively new to Pytorch and have been training an LSTM model. Any feedback on code in general would be appreciated.
When I train the model I receive the following warning
UserWarning: Using a target size (torch.Size([4050, 1, 1])) that is
different to the input size (torch.Size([1])). This will likely lead
to incorrect results due to broadcasting. Please ensure they have the
same size.
Can anyone help me with this problem?
My goal is to train a model on a time series containing multiple features. Please see the code below:
LSTM:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from Tools.data_loader import CreateDataset
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(LSTMModel, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers)
self.forecast = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
batch_size = 1
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).requires_grad_()
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).requires_grad_()
out, (hn,cn) = self.lstm(x, (h0.detach(), c0.detach()))
y_pred = self.forecast(out[-1].view(batch_size,-1))
return y_pred.view(-1)
dataset = CreateDataset('Data/Bloomberg_PV_weather.csv', 'Datetime', 0.8, 'Power') #PLACEHOLDER
input_dim = dataset.num_features
hidden_dim = 50
num_layers = 1
output_dim = 1
num_epochs = 50
X_train = dataset.X_train
y_train = dataset.y_train
X_test = dataset.X_test
y_test = dataset.y_test
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
criterion = nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters())
hist = np.zeros(num_epochs)
for epoch in range(num_epochs):
optimizer.zero_grad()
output=model(X_train)
loss = criterion(output,y_train)
if epoch % 100 == 0:
print('Epoch ', epoch, 'Loss: ',loss.item())
hist[epoch] = loss.item()
loss.backward()
optimizer.step()
Data Loader:
'''
Loads data and creates train and test sets
'''
import torch
import math
import pandas as pd
class CreateDataset():
def __init__(self, file, datetime_col, train_proportion, target):
self.file = file
self.datetime_col = datetime_col
self.train_proportion = train_proportion
self.target = target
self.data = self.load_data()
self.train_set, self.test_set = self.split_data()
self.X_train, self.y_train,self.X_test, self.y_test, self.num_features = self.reshape_data()
def load_data(self):
'''
Reads in data
'''
data = pd.read_csv(self.file, header=0)
data.drop(columns=self.datetime_col, inplace=True)
return data
def split_data(self):
'''
Creates test and train sets
'''
train_length = math.ceil(len(self.data)*self.train_proportion)
train_set = self.data[0:train_length]
test_set = self.data[train_length:]
return train_set, test_set
def reshape_data(self):
'''
Splits datasets into X and y sets and reshapes into 3D tensor
'''
num_features = (len(self.test_set.columns)-1)
y_train = torch.tensor(self.train_set[self.target].values).float()
X_train = torch.tensor(self.train_set.drop(columns=self.target).values).float()
y_test = torch.tensor(self.test_set[self.target].values).float()
X_test = torch.tensor(self.test_set.drop(columns=self.target).values).float()
X_train = X_train.view(-1,1,num_features)
y_train = y_train.view(-1,1,1)
X_test = X_test.view(-1,1,num_features)
y_test = y_test.view(-1,1,1)
return X_train, y_train, X_test, y_test, num_features

Categories

Resources