I want to split my dataset into test and validation datasets as my model might be suffering from overfitting. Here's my current architecture:
input_sentences = []
output_sentences = []
output_sentences_inputs = []
count = 0
for line in open(r'/content/drive/My Drive/TEMPPP/123.txt', encoding="utf-8"):
count += 1
if count > NUM_SENTENCES:
break
if '\t' not in line:
continue
input_sentence, output = line.rstrip().split('\t')
output_sentence = output + ' <eos>'
output_sentence_input = '<sos> ' + output
input_sentences.append(input_sentence)
output_sentences.append(output_sentence)
output_sentences_inputs.append(output_sentence_input)
input_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
input_tokenizer.fit_on_texts(input_sentences)
input_integer_seq = input_tokenizer.texts_to_sequences(input_sentences)
word2idx_inputs = input_tokenizer.word_index
max_input_len = max(len(sen) for sen in input_integer_seq)
output_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='')
output_tokenizer.fit_on_texts(output_sentences + output_sentences_inputs)
output_integer_seq = output_tokenizer.texts_to_sequences(output_sentences)
output_input_integer_seq = output_tokenizer.texts_to_sequences(output_sentences_inputs)
word2idx_outputs = output_tokenizer.word_index
num_words_output = len(word2idx_outputs) + 1
max_out_len = max(len(sen) for sen in output_integer_seq)
encoder_input_sequences = pad_sequences(input_integer_seq, maxlen=max_input_len)
decoder_input_sequences = pad_sequences(output_input_integer_seq, maxlen=max_out_len, padding='post')
import numpy as np
read_dictionary = np.load('/content/drive/My Drive/TEMPPP/hinvec.npy',allow_pickle='TRUE').item()
num_words = min(MAX_NUM_WORDS, len(word2idx_inputs) + 1)
embedding_matrix = np.zeros((num_words, EMBEDDING_SIZE))
for word, index in word2idx_inputs.items():
embedding_vector = read_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
embedding_layer = Embedding(num_words, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=max_input_len)
decoder_targets_one_hot = np.zeros((
len(input_sentences),
max_out_len,
num_words_output
),
dtype='float32'
)
decoder_output_sequences = pad_sequences(output_integer_seq, maxlen=max_out_len, padding='post')
for i, d in enumerate(decoder_output_sequences):
for t, word in enumerate(d):
decoder_targets_one_hot[i, t, word] = 1
encoder_inputs_placeholder = Input(shape=(max_input_len,))
x = embedding_layer(encoder_inputs_placeholder)
encoder = LSTM(LSTM_NODES, return_state=True)
encoder_outputs, h, c = encoder(x)
encoder_states = [h, c]
decoder_inputs_placeholder = Input(shape=(max_out_len,))
decoder_embedding = Embedding(num_words_output, LSTM_NODES)
decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder)
decoder_lstm = LSTM(LSTM_NODES, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs_x, initial_state=encoder_states)
decoder_dense = Dense(num_words_output, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
import tensorflow as tf
starter_learning_rate = 0.1
end_learning_rate = 0.01
decay_steps = 2000
learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
starter_learning_rate,
decay_steps,
end_learning_rate,
power=0.5)
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn, epsilon=1e-03, clipvalue=0.5)
model = Model([encoder_inputs_placeholder,
decoder_inputs_placeholder],
decoder_outputs)
model.compile(
optimizer=opt,
loss='categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
[encoder_input_sequences, decoder_input_sequences],
decoder_targets_one_hot,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_split=0.1,
)
After reading dataset, its already being stored in input_sentences and output_sentences so I thought I can pass them directly to X,y like this:
from sklearn.model_selection import train_test_split
X=input_sentences
y=output_sentences
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
This way I get my Hindi sentences in X and English sentences in y with . Now im really confused how to implement it in my model?
Related
I'm working on some code about nlp. I want to train and save model but here comes this error. I searched some documentation but i didn't find right solution. How can i solve this problem?
import torch,time
import torch.nn as nn
input_dim = 5
hidden_dim = 10
n_layers = 1
lstm_layer = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
batch_size = 1
seq_len = 1
inp = torch.randn(batch_size, seq_len, input_dim)
hidden_state = torch.randn(n_layers, batch_size, hidden_dim)
cell_state = torch.randn(n_layers, batch_size, hidden_dim)
hidden = (hidden_state, cell_state)
out, hidden = lstm_layer(inp, hidden)
print("Output shape: ", out.shape)
print("Hidden: ", hidden)
seq_len = 3
inp = torch.randn(batch_size, seq_len, input_dim)
out, hidden = lstm_layer(inp, hidden)
print(out.shape)
# Obtaining the last output
out = out.squeeze()[-1, :]
print(out.shape)
import bz2
from collections import Counter
import re
import nltk
import numpy as np
#nltk.download('punkt')
train_file = bz2.BZ2File('C:/Users/DELL/Dogal-Dil-Isleme/Xml-Files/trwiktionary-20200301-pages-articles-multistream.xml.bz2')
test_file = bz2.BZ2File('C:/Users/DELL/Dogal-Dil-Isleme/Xml-Files/trwikisource-20200601-pages-articles.xml.bz2')
train_file = train_file.readlines()
test_file = test_file.readlines()
num_train = 200
num_test = 50
train_file = [x.decode('utf-8') for x in train_file[:num_train]]
test_file = [x.decode('utf-8') for x in test_file[:num_test]]
train_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in train_file]
train_sentences = [x.split(' ', 1)[1][:-1].lower() for x in train_file]
test_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in test_file]
test_sentences = [x.split(' ', 1)[1][:-1].lower() for x in test_file]
for i in range(len(train_sentences)):
train_sentences[i] = re.sub('\d','0',train_sentences[i])
for i in range(len(test_sentences)):
test_sentences[i] = re.sub('\d','0',test_sentences[i])
for i in range(len(train_sentences)):
if 'www.' in train_sentences[i] or 'http:' in train_sentences[i] or 'https:' in train_sentences[i] or '.com' in train_sentences[i]:
train_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", train_sentences[i])
for i in range(len(test_sentences)):
if 'www.' in test_sentences[i] or 'http:' in test_sentences[i] or 'https:' in test_sentences[i] or '.com' in test_sentences[i]:
test_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", test_sentences[i])
words = Counter() # Dictionary that will map a word to the number of times it appeared in all the training sentences
for i, sentence in enumerate(train_sentences):
train_sentences[i] = []
for word in nltk.word_tokenize(sentence):
words.update([word.lower()])
train_sentences[i].append(word)
if i%20000 == 0:
print(str((i*100)/num_train) + "% done")
print("100% done")
words = {k:v for k,v in words.items() if v>1}
words = sorted(words, key=words.get, reverse=True)
words = ['_PAD','_UNK'] + words
word2idx = {o:i for i,o in enumerate(words)}
idx2word = {i:o for i,o in enumerate(words)}
for i, sentence in enumerate(train_sentences):
train_sentences[i] = [word2idx[word] if word in word2idx else 0 for word in sentence]
for i, sentence in enumerate(test_sentences):
# For test sentences, we have to tokenize the sentences as well
test_sentences[i] = [word2idx[word.lower()] if word.lower() in word2idx else 0 for word in nltk.word_tokenize(sentence)]
def pad_input(sentences, seq_len):
features = np.zeros((len(sentences), seq_len),dtype=int)
for ii, review in enumerate(sentences):
if len(review) != 0:
features[ii, -len(review):] = np.array(review)[:seq_len]
return features
seq_len = 200 # The length that the sentences will be padded/shortened to
train_sentences = pad_input(train_sentences, seq_len)
test_sentences = pad_input(test_sentences, seq_len)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
split_frac = 0.5 # 50% validation, 50% test
split_id = int(split_frac * len(test_sentences))
val_sentences, test_sentences = test_sentences[:split_id], test_sentences[split_id:]
val_labels, test_labels = test_labels[:split_id], test_labels[split_id:]
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
train_data = TensorDataset(torch.from_numpy(train_sentences), torch.from_numpy(train_labels))
val_data = TensorDataset(torch.from_numpy(val_sentences), torch.from_numpy(val_labels))
test_data = TensorDataset(torch.from_numpy(test_sentences), torch.from_numpy(test_labels))
batch_size = 200
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()
# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
device = torch.device("cuda")
else:
device = torch.device("cpu")
class SentimentNet(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
super(SentimentNet, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
self.dropout = nn.Dropout(drop_prob)
self.fc = nn.Linear(hidden_dim, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x, hidden):
batch_size = x.size(0)
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
out = self.dropout(lstm_out)
out = self.fc(out)
out = self.sigmoid(out)
out = out.view(batch_size, -1)
out = out[:,-1]
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
return hidden
vocab_size = len(word2idx) + 1
output_size = 1
embedding_dim = 400
hidden_dim = 512
n_layers = 2
model = SentimentNet(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)
model.to(device)
lr=0.005
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 2
counter = 0
print_every = 1000
clip = 5
valid_loss_min = np.Inf
model.train()
for i in range(epochs):
h = model.init_hidden(batch_size)
for inputs, labels in train_loader:
counter += 1
h = tuple([e.data for e in h])
inputs, labels = inputs.to(device), labels.to(device)
model.zero_grad()
output, h = model(inputs, h)
loss = criterion(output.squeeze(), labels.float())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
if counter%print_every == 0:
val_h = model.init_hidden(batch_size)
val_losses = []
model.eval()
for inp, lab in val_loader:
val_h = tuple([each.data for each in val_h])
inp, lab = inp.to(device), lab.to(device)
out, val_h = model(inp, val_h)
val_loss = criterion(out.squeeze(), lab.float())
val_losses.append(val_loss.item())
model.train()
print("Epoch: {}/{}...".format(i+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
if np.mean(val_losses) <= valid_loss_min:
torch.save(model.state_dict(), 'C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict.pt')
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
valid_loss_min = np.mean(val_losses)
time.sleep(1)
# Loading the best model
model.load_state_dict(torch.load('C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict.pt'))
test_losses = []
num_correct = 0
h = model.init_hidden(batch_size)
model.eval()
for inputs, labels in test_loader:
h = tuple([each.data for each in h])
inputs, labels = inputs.to(device), labels.to(device)
output, h = model(inputs, h)
test_loss = criterion(output.squeeze(), labels.float())
test_losses.append(test_loss.item())
pred = torch.round(output.squeeze()) # Rounds the output to 0/1
correct_tensor = pred.eq(labels.float().view_as(pred))
correct = np.squeeze(correct_tensor.cpu().numpy())
num_correct += np.sum(correct)
print("Test loss: {:.3f}".format(np.mean(test_losses)))
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}%".format(test_acc*100))
i tried a create new folder and change path but all the ways comes error :)
i read pytorch documentation and change recommended code but error still came.
i will share some link for your reading about this issue.
same issue
pytorch documentation
how can i fix or is there any alternative way to save model?
Try changing it to: model.load_state_dict(torch.load('C:/Users/DELL/Dogal-Dil-Isleme/Models/state_dict'))
I'm learning text classification using movie reviews as data with tensorflow, but I got stuck when I get an output prediction different (not rounded, not binary) to the label.
CODE
predict = model.predict([test_review])
print("Prediction: " + str(predict[0])) # [1.8203685e-19]
print("Actual: " + str(test_labels[0])) # 0
The expected ouput should be:
Prediction: [0.]
Actual: 0
What the output is giving:
Prediction: [1.8203685e-19]
Actual: 0
The output prediction should be 0 or 1, representing if the review was good or not.
FULL CODE
import tensorflow as tf
from tensorflow import keras
import numpy as np
data = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words = 10000)
word_index = data.get_word_index()
word_index = {k:(v + 3) for k, v in word_index.items()}
word_index['<PAD>'] = 0
word_index['<START>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value = word_index['<PAD>'], padding = 'post', maxlen = 256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value = word_index['<PAD>'], padding = 'post', maxlen = 256)
def decode_review(text):
""" decode the training and testing data into readable words"""
return ' '.join([reverse_word_index.get(i, '?') for i in text])
print("\n")
print(decode_review(test_data[0]))
model = keras.Sequential()
model.add(keras.layers.Embedding(10000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation = 'relu'))
model.add(keras.layers.Dense(1, activation = 'sigmoid'))
model.summary()
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
x_val = train_data[:10000]
x_train = train_data[10000:]
y_val = train_labels[:10000]
y_train = train_labels[10000:]
fitModel = model.fit(x_train, y_train, epochs = 40,
batch_size = 512,
validation_data = (x_val, y_val),
verbose = 1)
results = model.evaluate(test_data, test_labels)
test_review = test_data[0]
predict = model.predict([test_review])
print("Review: ")
print(decode_review(test_review))
print("Prediction: " + str(predict[0])) # [1.8203685e-19]
print("Actual: " + str(test_labels[0]))
print("\n[loss, accuracy]: ", results)
Replace the predict method with predict_classes method:
model.predict_classes([test_review])
The model is as below:
inputs_1 = keras.Input(shape=(10081,1))
layer1 = Conv1D(64,14)(inputs_1)
layer2 = layers.MaxPool1D(5)(layer1)
layer3 = Conv1D(64, 14)(layer2)
layer4 = layers.GlobalMaxPooling1D()(layer3)
inputs_2 = keras.Input(shape=(85,))
layer5 = layers.concatenate([layer4, inputs_2])
layer6 = Dense(128, activation='relu')(layer5)
layer7 = Dense(2, activation='softmax')(layer6)
model_2 = keras.models.Model(inputs = [inputs_1, inputs_2], output = [layer7])
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,0:10166], df[['Result_cat','Result_cat1']].values, test_size=0.2)
X_train = X_train.to_numpy()
X_train = X_train.reshape([X_train.shape[0], X_train.shape[1], 1])
X_train_1 = X_train[:,0:10081,:]
X_train_2 = X_train[:,10081:10166,:].reshape(736,85)
X_test = X_test.to_numpy()
X_test = X_test.reshape([X_test.shape[0], X_test.shape[1], 1])
X_test_1 = X_test[:,0:10081,:]
X_test_2 = X_test[:,10081:10166,:].reshape(185,85)
adam = keras.optimizers.Adam(lr = 0.0005)
model_2.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['acc'])
history = model_2.fit([X_train_1,X_train_2], y_train, epochs = 120, batch_size = 256, validation_split = 0.2, callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)])
Questions:
1) The data is 921rows x 10166columns. Each row is an observation(first 10080 columns being a time series with remaining columns being other statistics features). According to the model, is the input data split into inputs_1 and inputs_2 randomly?
2) I am thinking about doing a kfold cross-validation and splitting the input data into inputs_1 and inputs_2. What is a good way to do this? Thanks
By splitting only indexes.
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=False)
ID_Inp = np.array(range(nSamples))
ID_Out = np.array(range(nSamples))
Inputs = [Input1,Input2]
for IDs_Train, IDs_Test in kfold.split(ID_Inp, ID_Out):
Fold_Train_Input1, Fold_Train_Input2 = Input1[IDs_Train], Input2[IDs_Train]
Fold_Train_OutPut = Output[IDs_Train]
Fold_Test_Input1, Fold_Test_Input2 = Input1[IDs_Test], Input2[IDs_Test]
Fold_Test_OutPut = Output[IDs_Test]
####################
I have the following code and sample which is working fine and exactly I want it to:
import numpy as np
import pandas as pd
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
valid_set_size_percentage = 3
test_set_size_percentage = 3
seq_len = 5 # choose sequence length
df = pd.read_csv("Test.csv", encoding = 'utf-16',sep=',',index_col = 0)
df.head()
def normalize_data(df):
cols = list(df_stock.columns.values)
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
df = pd.DataFrame(min_max_scaler.fit_transform(df.values))
df.columns = cols
return df
def load_data(stock, seq_len):
data_raw = stock.as_matrix() # convert to numpy array
data = []
print(data_raw.shape)
for index in range(len(data_raw) - seq_len):
data.append(data_raw[index: index + seq_len])
data = np.array(data);
valid_set_size = int(np.round(valid_set_size_percentage/100*data.shape[0]));
test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
train_set_size = data.shape[0] - (valid_set_size + test_set_size);
x_train = data[:train_set_size,:-1,:]
y_train = data[:train_set_size,-1,:4]
x_valid = data[train_set_size:train_set_size+valid_set_size,:-1,:]
y_valid = data[train_set_size:train_set_size+valid_set_size,-1,:4]
x_test = data[train_set_size+valid_set_size:,:-1,:]
y_test = data[train_set_size+valid_set_size:,-1,:4]
return [x_train, y_train, x_valid, y_valid, x_test, y_test]
df_stock = df.copy()
cols = list(df_stock.columns.values)
print('df_stock.columns.values = ', cols)
df_stock_norm = df_stock.copy()
df_stock_norm = normalize_data(df_stock_norm)
x_train, y_train, x_valid, y_valid, x_test, y_test = load_data(df_stock_norm, seq_len)
print(y_train[:2])
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ', y_train.shape)
print('Inputs = ',x_train.shape[2])
print('Outputs = ', y_train.shape[1])
print('x_valid.shape = ',x_valid.shape)
print('y_valid.shape = ', y_valid.shape)
print('x_test.shape = ', x_test.shape)
print('y_test.shape = ',y_test.shape)
index_in_epoch = 0;
perm_array = np.arange(x_train.shape[0])
np.random.shuffle(perm_array)
def get_next_batch(batch_size):
global index_in_epoch, x_train, perm_array
if index_in_epoch > x_train.shape[0]:
start = 0 # start next epoch
index_in_epoch = 0#batch_size
start = index_in_epoch
index_in_epoch += batch_size
end = index_in_epoch
return x_train[perm_array[start:end]], y_train[perm_array[start:end]]
n_steps = seq_len -1
n_inputs = x_train.shape[2]
n_neurons = 100
n_outputs = y_train.shape[-1]
n_layers = 2
learning_rate = 0.001
batch_size =10
n_epochs = 100
train_set_size = x_train.shape[0]
test_set_size = x_test.shape[0]
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None,n_outputs])
layers = [tf.contrib.rnn.LSTMCell(num_units=n_neurons,
activation=tf.nn.leaky_relu, use_peepholes = True)
for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] # keep only last output of sequence
loss = tf.reduce_mean(tf.squared_difference(outputs, y)) # loss function = mean squared error
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for iteration in range(int(n_epochs*train_set_size/batch_size)):
x_batch, y_batch = get_next_batch(batch_size) # fetch the next training batch
sess.run(training_op, feed_dict={X: x_batch, y: y_batch})
if iteration % int(1*train_set_size/batch_size) == 0:
mse_train = loss.eval(feed_dict={X: x_train, y: y_train})
mse_valid = loss.eval(feed_dict={X: x_valid, y: y_valid})
mse_test = loss.eval(feed_dict={X: x_test, y: y_test})
print('%.2f epochs: MSE train/valid/test = %.3f/%.3f/%.3f'%(
iteration*batch_size/train_set_size, mse_train, mse_valid,mse_test))
try:
save_path = saver.save(sess, "modelfile\\model"+str(iteration)+".ckpt")
except Exception as e:
print(e)
if not os.path.exists("modelfile\\"):
os.makedirs("modelfile\\")
save_path = saver.save(sess, "modelfile\\model"+str(iteration)+".ckpt")
The following is my sample of what I am trying to execute:
Same data Please click and see
I am willing to add the Projector of the Tensorboard to my code. But I could not understand how I can make it. I want to visualize the different inputs I am giving for my training. I am supplying the following columns and trying to predict the ohlc values.
'o', 'h', 'l', 'c', 'rel1', 'rel2', 'rel3', 'rel4', 'rel5', 'rel6', 'rel7', 'rel8'
I want to visualize the above columns in the projector to know how they are relating with each other to give me the output.
Please let me know what I can do to get what I am willing to.
EDITED:
I have tried something as follows but cannot see the projector tab:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None,n_outputs])
symbols = tf.placeholder(tf.int32, [None, 1], name='stock_labels')
embed_matrix = tf.Variable(
tf.random_uniform([1, n_inputs],0.0, 1.0),
name="embed_matrix"
)
stacked_symbols = tf.tile(symbols, [batch_size,n_steps], name='stacked_stock_labels')
stacked_embeds = tf.nn.embedding_lookup(embed_matrix, stacked_symbols)
# stacked_embeds = tf.nn.embedding_lookup(embed_matrix)
# After concat, inputs.shape = (batch_size, num_steps, input_size + embed_size)
inputs_with_embed = tf.concat([X, stacked_embeds], axis=2, name="inputs_with_embed")
embed_matrix_summ = tf.summary.histogram("embed_matrix", embed_matrix)
And edited the following lines in the session code:
merged_sum = tf.summary.merge_all()
global_step = 0
# Set up the logs folder
writer = tf.summary.FileWriter('logs')
writer.add_graph(sess.graph)
projector_config = projector.ProjectorConfig()
# You can add multiple embeddings. Here we add only one.
added_embed = projector_config.embeddings.add()
added_embed.tensor_name = embed_matrix.name
# Link this tensor to its metadata file (e.g. labels).
shutil.copyfile("logs\\metadata.tsv",
"logs\\metadata1.tsv")
added_embed.metadata_path = "metadata.tsv"
# The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will
# read this file during startup.
projector.visualize_embeddings(writer, projector_config)
sess.run(tf.global_variables_initializer())
if iteration % int(1*train_set_size/batch_size) == 0:
global_step += 1
mse_train = loss.eval(feed_dict={X: x_train, y: y_train})
mse_valid = loss.eval(feed_dict={X: x_valid, y: y_valid})
mse_test = loss.eval(feed_dict={X: x_test, y: y_test})
_,train_merge = sess.run([outputs,merged_sum], feed_dict={X: x_train, y: y_train})
writer.add_summary(train_merge, global_step=global_step)
Here is teh metadata.tsv file
Please let me know what I missed.
I am trying to build a chatbot in python using tensorflow and tensorlayer. My code is the following:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import os
import time
import re
import tensorflow as tf
import tensorlayer as tl
import cPickle as pickle
FILE_DIR = os.path.dirname(os.path.realpath(__file__))
PAD_TOKEN = '<PAD>'
START_TOKEN = '<START>'
END_TOKEN = '<END>'
UNK_TOKEN = '<UNK>'
PAD_ID = 0
START_ID = 1
END_ID = 2
UNK_ID = 3
STARTING_VOCAB = {PAD_TOKEN: PAD_ID, START_TOKEN: START_ID, END_TOKEN: END_ID, UNK_TOKEN: UNK_ID}
_DIGIT_RE = re.compile(br"\d")
class Chatbot(object):
def __init__(self, embedding_dim, n_layers=3):
self.embedding_dim = embedding_dim
self.n_layers = n_layers
self.w2idx = STARTING_VOCAB
self.idx2w = {}
self.encode_seqs = None
self.decode_seqs = None
self.net = None
self.net_rnn = None
self.y = None
#staticmethod
def load():
with open(os.path.join(location, 'object.pkl'), 'rb') as pickle_file:
obj = pickle.load(pickle_file)
obj.encode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
obj.decode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
obj.net, obj.net_rnn = Chatbot.model(obj.encode_seqs, obj.decode_seqs, obj.idx2w, obj.embedding_dim, obj.n_layers, is_train=False, reuse=True)
obj.y = tf.nn.softmax(obj.net.outputs)
new_saver = tf.train.import_meta_graph(os.path.join(location, 'my-model.meta'))
new_saver.restore(sess, tf.train.latest_checkpoint(location))
return obj
#staticmethod
def model(encode_seqs, decode_seqs, idx2w, embedding_dim, n_layers, is_train=True, reuse=False):
with tf.variable_scope("model", reuse=reuse):
# for chatbot, you can use the same embedding layer,
# for translation, you may want to use 2 seperated embedding layers
with tf.variable_scope("embedding") as vs:
net_encode = tl.layers.EmbeddingInputlayer(inputs=encode_seqs,
vocabulary_size=len(idx2w),
embedding_size=embedding_dim,
name='seq_embedding')
vs.reuse_variables()
tl.layers.set_name_reuse(True)
net_decode = tl.layers.EmbeddingInputlayer(inputs=decode_seqs,
vocabulary_size=len(idx2w),
embedding_size=embedding_dim,
name='seq_embedding')
net_rnn = tl.layers.Seq2Seq(net_encode,
net_decode,
cell_fn=tf.contrib.rnn.BasicLSTMCell,
n_hidden=embedding_dim,
initializer=tf.random_uniform_initializer(-0.1, 0.1),
encode_sequence_length=tl.layers.retrieve_seq_length_op2(encode_seqs),
decode_sequence_length=tl.layers.retrieve_seq_length_op2(decode_seqs),
initial_state_encode=None,
dropout=(0.5 if is_train else None),
n_layer=n_layers,
return_seq_2d=True, name='seq2seq')
net_out = tl.layers.DenseLayer(net_rnn, n_units=len(idx2w), act=tf.identity, name='output')
return net_out, net_rnn
def train(self, X_train, y_train, sess, batch_size, n_epochs):
n_step = int(len(X_train) / batch_size)
# Create vocabulary
X_train = [re.sub(_DIGIT_RE, UNK_TOKEN, x.decode('utf-8')) for x in X_train]
y_train = [re.sub(_DIGIT_RE, UNK_TOKEN, x.decode('utf-8')) for x in y_train]
vectorizer = CountVectorizer(tokenizer=word_tokenize)
all_sentences = X_train + y_train
vectorizer.fit_transform(all_sentences)
for k, v in vectorizer.vocabulary_.iteritems():
vectorizer.vocabulary_[k] = v + len(self.w2idx)
self.w2idx.update(vectorizer.vocabulary_)
self.idx2w = dict((v, k) for k, v in self.w2idx.iteritems())
# Transform data from sentences to sequences of ids
for i in range(len(X_train)):
X_train_id_seq, y_train_id_seq = [], []
for w in word_tokenize(X_train[i]):
if w.lower() in self.w2idx:
X_train_id_seq.append(self.w2idx[w.lower()])
else:
X_train_id_seq.append(self.w2idx[UNK_TOKEN])
X_train[i] = X_train_id_seq + [PAD_ID]
for w in word_tokenize(y_train[i]):
if w.lower() in self.w2idx:
y_train_id_seq.append(self.w2idx[w.lower()])
else:
y_train_id_seq.append(self.w2idx[UNK_TOKEN])
y_train[i] = y_train_id_seq + [PAD_ID]
training_encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
training_decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
training_target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
training_target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask")
training_net_out, _ = Chatbot.model(training_encode_seqs, training_decode_seqs, self.idx2w, self.embedding_dim, self.n_layers, is_train=True, reuse=False)
# model for inferencing
self.encode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
self.decode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
self.net, self.net_rnn = Chatbot.model(self.encode_seqs, self.decode_seqs, self.idx2w, self.embedding_dim, self.n_layers, is_train=False, reuse=True)
self.y = tf.nn.softmax(self.net.outputs)
loss = tl.cost.cross_entropy_seq_with_mask(logits=training_net_out.outputs, target_seqs=training_target_seqs,
input_mask=training_target_mask, return_details=False, name='cost')
lr = 0.0001
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
tl.layers.initialize_global_variables(sess)
for epoch in range(n_epochs):
epoch_time = time.time()
# shuffle training data
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train, random_state=0)
# train an epoch
total_err, n_iter = 0, 0
for X, Y in tl.iterate.minibatches(inputs=X_train, targets=y_train, batch_size=batch_size, shuffle=False):
step_time = time.time()
X = tl.prepro.pad_sequences(X)
_target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=END_ID)
_target_seqs = tl.prepro.pad_sequences(_target_seqs)
_decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=START_ID, remove_last=False)
_decode_seqs = tl.prepro.pad_sequences(_decode_seqs)
_target_mask = tl.prepro.sequences_get_mask(_target_seqs)
_, err = sess.run([train_op, loss],
{training_encode_seqs: X,
training_decode_seqs: _decode_seqs,
training_target_seqs: _target_seqs,
training_target_mask: _target_mask})
print("Epoch[%d/%d] step:[%d/%d] loss:%f took:%.5fs" % (
epoch, n_epochs, n_iter, n_step, err, time.time() - step_time))
total_err += err;
n_iter += 1
print("Epoch[%d/%d] averaged loss:%f took:%.5fs" % (epoch, n_epochs, total_err / n_iter,
time.time() - epoch_time))
def save(self):
if not os.path.exists(location):
os.makedirs(location)
saver = tf.train.Saver()
saver.save(sess, os.path.join(location, 'my-model'))
tf.train.write_graph(sess.graph, location, 'my-graph.pbtxt')
self.net = None
self.net_rnn = None
self.y = None
self.encode_seqs = None
self.decode_seqs = None
with open(os.path.join(location, 'object.pkl'), 'wb') as pickle_file:
pickle.dump(self, pickle_file)
I can do training perfectly fine, I pass a list of sentences as X_train and another as y_train. What I need help with is saving the model and then reloading it later for training or testing. I tried just using pickle but it gives an error. How can I save and load seq2seq models in python using tensorflow and tensorlayer?
pickle object is not the best way to save your model.
After you finished training use
saver = tf.train.Saver()
saver.save(sess, model_name)
and the load it
saver = tf.train.Saver()
sess = tf.Session()
saver.restore(sess = sess, save_path='sentiment_analysis_tsm')
in order to load it, you'll have to previously build an equivalent model to the one you trained with.
To save the graph object, try using tf.train.write_graph