I have tried writing training code for lang translation. Now during testing the classifiier I am facing issue. It just need adjusting the X and Y value for test input but I am bit confused.
This is the code for training the model
def train_seq2seq(self):
print("Input sequence read, starting training")
s2s = seq2seq(self.vocab_size + 3, self.maxlen + 2, \
self.vocab_size + 3)
self.model = s2s.seq2seq_plain()
#For testing considering 100 epoch instead of 10000
for e in range(10):
print("epoch %d \n" % e)
for ind, (X,Y) in enumerate(self.proproces.gen_batch()):
loss, acc = model.train_on_batch(X, Y)#, batch_size=64, nb_epoch=1)
#print("Loss is %f, accuracy is %f " % (loss, acc), end='\r')
# After one epoch test one sentence
if ind % 10 == 0:
testX = X[0,:].reshape(1, self.maxlen + 2)
testY = Y[0]
pred = model.predict(testX, batch_size=1)
self.decode(testX, pred)
Test code in which I am facing issue is -
def encode(self):
#Encodes input sentence into fixed length vector
#print("Enter sentence in hindi")
inp = raw_input("Please enter the sentence\n").decode("utf-8")
tokens = inp.split()
seq = []
for token in tokens:
if token in self.proproces.vocab_tar:
seq.append(self.proproces.vocab_tar[token])
else:
token = "UNK"
seq.append(self.proproces.vocab_tar[token])
#seq = map(lambda x:self.proproces.vocab_hind[x], tokens)
# Normalize seq to maxlen
X = []
X.append(seq)
print(X) #[[400, 23, 400]]
temp = pad_sequences(X, maxlen=self.maxlen)
print(temp.shape) #(1, 6)
temp[0:len(seq)] = seq
# print(len(temp))
# temp = np.asarray(temp).reshape(128,)
# print(temp.shape)
prob = self.model.predict_on_batch(temp)#, batch_size=1, verbose=0)
translated = self.decode(prob)
print("Tranlated is", translated)
print("Probabilities are", prob)
print("Shape of prob tensor is",prob.shape)
I am referring this tutorial - https://github.com/shashankg7/Seq2Seq/blob/master/seq2seq/seq2seq.py
I am confused in adjusting the shape of X and Y, any guidance much much appreciated
Related
Please house I am new in nmt, I did hard coding of a model to translate English to my dialect. The model have worked perfectly but I am having trouble defining user input for actual translation to the model. Blow is the src of the model evaluation. Your assistance will be really appreciated.
`
# load a clean dataset
def load_clean_sentences(filename):
return load(open(filename, 'rb'))
# load datasets
dataset = load_clean_sentences('english-tiv-both.pkl')
train = load_clean_sentences('english-tiv-train.pkl')
test = load_clean_sentences('english-tiv-test.pkl')
# fit a tokenizer
def create_tokenizer(lines):
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
# max sentence length
def max_length(lines):
return max(len(line.split()) for line in lines)
# prepare english tokenizer
eng_tokenizer = create_tokenizer(dataset[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = max_length(dataset[:, 0])
# prepare tiv tokenizer
tiv_tokenizer = create_tokenizer(dataset[:, 1])
tiv_vocab_size = len(tiv_tokenizer.word_index) + 1
tiv_length = max_length(dataset[:, 1])
## encode and pad sequences
def encode_sequences(tokenizer, length, lines):
# integer encode sequences
X = tokenizer.texts_to_sequences(lines)
# pad sequences with 0 values
X = pad_sequences(X, maxlen=length, padding='post')
return X
## one hot encode target sequence
def encode_output(sequences, vocab_size):
ylist = list()
for sequence in sequences:
encoded = to_categorical(sequence, num_classes=vocab_size)
ylist.append(encoded)
y = array(ylist)
# prepare data
trainX = encode_sequences(tiv_tokenizer, tiv_length, train[:, 1])
testX = encode_sequences(tiv_tokenizer, tiv_length, test[:, 1])
# load model
model = load_model('model.h5')
# map an integer to a word
def word_for_id(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# generate target given source sequence
def predict_sequence(model, tokenizer, source):
prediction = model.predict(source, verbose=0)[0]
integers = [argmax(vector) for vector in prediction]
target = list()
for i in integers:
word = word_for_id(i, tokenizer)
if word is None:
break
target.append(word)
return ' '.join(target)
# evaluate the skill of the model
def evaluate_model(model, tokenizer, sources, raw_dataset):
actual, predicted = list(), list()
for i, source in enumerate(sources):
# translate encoded source text
source = source.reshape((1, source.shape[0]))
translation = predict_sequence(model, tiv_tokenizer, source)
raw_target, raw_src = raw_dataset[i]
if i < 10:
print('src=[%s], target=[%s], predicted=[%s]' % (raw_src, raw_target, translation))
actual.append(raw_target.split())
predicted.append(translation.split())
# calculate BLEU score
print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))
# test on some training sequences
print('train')
evaluate_model(model, eng_tokenizer, trainX, train)
# test on some test sequences
print('test')
evaluate_model(model, eng_tokenizer, testX, test)
`
I want an interface where Users can input sentence in English and get output in Tiv language using the trained model. My presentation is coming up on Monday and I am stuck at the moment.
My dataset is only 10 thousand sentences. I run it in batches of 100, and clear the memory on each run. I manually slice the sentences to only 50 characters. After running for 32 minutes, it crashes... On google colab with 25 gigs of ram.
I must be doing something terribly wrong.
I'm using the out-of-the-box model and tokenizer.
def eval_model(model, tokenizer_, X, y, batchsize, maxlength):
assert len(X) == len(y)
labels = ["negative", "positive"]
correctCounter = 0
epochs = int(np.ceil(len(dev_sent) / batchsize))
accuracies = []
for i in range(epochs):
print(f"Epoch {i}")
# slicing up the data into batches
X_ = X[i:((i+1)*100)]
X_ = [x[:maxlength] for x in X_] # make sure sentences are only of maxlength
y_ = y[i:((i+1)*100)]
encoded_input = tokenizer(X_, return_tensors='pt', padding=True, truncation=True, max_length=maxlength)
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
for i, scores in enumerate([softmax(logit) for logit in output["logits"].detach().numpy()]):
print("--------------------")
print(f'Sentence no. {len(accuracies)+1}')
print("Sentence: " + X_[i])
print("Score: " + str(scores))
ranking = np.argsort(scores)
print(f"Ranking: {ranking}")
pred = labels[np.argmax(np.argsort(scores))]
print(f"Prediction: {pred}, annotation: {y_[i]}")
if pred == y_[i]:
print("SUCCES!")
correctCounter += 1
else: print("FAILURE!")
# garbage collection (to not run out of ram... Which is shouldn't, it's just a few kb, but it does.... ?!)
del(encoded_input)
del(output)
del(scores)
gc.collect()
accuracies.append(correctCounter / len(y_))
#print(f"current accuracy: {np.mean(np.asarray(accuracies))}")
return np.mean(np.asarray(accuracies))
task='sentiment'
MODEL = f"distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)
tokenizer.save_pretrained(MODEL)
accuracy = eval_model(model, tokenizer, dev_sent, dev_sentiment, 100, 50)
EDIT: here is the code on google colab https://colab.research.google.com/drive/1qKTabPTNYWEILoua0gIvlYDly8jcibr0?usp=sharing
I am trying to set an instance so that dropout is compute only during the training session, but somehow it seems that the model doesn't see the dropout layer, as when modifying the probabilities nothing happens. I suspect it's a logic issue in my code, but I can't spot where. Also, I'm relatively new to this world, so please cope with my inexperience. Any help will be much appreciated.
Here's the code. I first create a Boolean placeholder
Train = tf.placeholder(tf.bool,shape=())
which will be then passed into a dictionary value as true(training) or False(test). Then I implemented the forward propagation as follows.
def forward_prop_cost(X, parameters,string,drop_probs,Train):
"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", ...
string - ReLU or tanh
drop_probs = drop probabilities for each layer. First and last == 0
Train = boolean
Returns:
ZL -- the output of the last LINEAR unit
"""
L = len(drop_probs)-1
activations = []
activations.append(X)
if string == 'ReLU':
for i in range(1,L):
Zi = tf.matmul(parameters['W'+str(i)],activations[i-1]) + parameters['b'+str(i)]
if (Train == True and drop_probs[i] != 0):
Ai = tf.nn.dropout(tf.nn.relu(Zi),drop_probs[i])
else:
Ai = tf.nn.relu(Zi)
activations.append(Ai)
elif string == 'tanh': #needs update!
for i in range(1,L):
Zi = tf.matmul(parameters['W'+str(i)],activations[i-1]) + parameters['b'+str(i)]
Ai = tf.nn.dropout(tf.nn.tanh(Zi),drop_probs[i])
activations.append(Ai)
ZL = tf.matmul(parameters['W'+str(L)],activations[L-1]) + parameters['b'+str(L)]
logits = tf.transpose(ZL)
labels = tf.transpose(Y)
return ZL
Then I call the model function, where just at the end I pass the values of the Train as true or false, depending on the data set I'm using.
def model(X_train, Y_train, X_test, Y_test,hidden = [12288,25,12,6], string = 'ReLU',drop_probs = [0.,0.4,0.2,0.],
regular_param = 0.0, starter_learning_rate = 0.0001,
num_epochs = 1500, minibatch_size = 32, print_cost = True, learning_decay = False):
'''
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
'''
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 3
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
graph = tf.Graph()
X, Y ,Train = create_placeholders(n_x, n_y)
parameters = initialize_parameters(hidden)
#print([n.name for n in tf.get_default_graph().as_graph_def().node])
ZL = forward_prop_cost(X, parameters,'ReLU',drop_probs,Train)
#cost = forward_prop_cost(X, parameters,'ReLU',drop_probs,regular_param )
cost = compute_cost(ZL,Y,parameters,regular_param)
#optimizer = tf.train.AdamOptimizer(learning_rate = starter_learning_rate).minimize(cost)
if learning_decay == True:
increasing = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate,increasing * minibatch_size,m, 0.95, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost,global_step=increasing)
else:
optimizer = tf.train.AdamOptimizer(learning_rate = starter_learning_rate).minimize(cost)
# Initialize all the variables
init = tf.global_variables_initializer()
# Start the session to compute the tensorflow graph
with tf.Session() as sess:
# Run the initialization
sess.run(init, { Train: True } )
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0.
num_minibatches = int(m / minibatch_size)
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every 100 epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per fives)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate accuracy on the test set
correct_prediction = tf.equal(tf.argmax(ZL), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train, Train: True}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test, Train: False}))
return parameters
I'm working on implementaion of word2vec architecture from scratch. But my model doesn't converge.
class SkipGramBatcher:
def __init__(self, text):
self.text = text.results
def get_batches(self, batch_size):
n_batches = len(self.text)//batch_size
pairs = []
for idx in range(0, len(self.text)):
window_size = 5
idx_neighbors = self._get_neighbors(self.text, idx, window_size)
#one_hot_idx = self._to_one_hot(idx)
#idx_pairs = [(one_hot_idx, self._to_one_hot(idx_neighbor)) for idx_neighbor in idx_neighbors]
idx_pairs = [(idx,idx_neighbor) for idx_neighbor in idx_neighbors]
pairs.extend(idx_pairs)
for idx in range(0, len(pairs), batch_size):
X = [pair[0] for pair in pairs[idx:idx+batch_size]]
Y = [pair[1] for pair in pairs[idx:idx+batch_size]]
yield X,Y
def _get_neighbors(self, text, idx, window_size):
text_length = len(text)
start = max(idx-window_size,0)
end = min(idx+window_size+1,text_length)
neighbors_words = set(text[start:end])
return list(neighbors_words)
def _to_one_hot(self, indexes):
n_values = np.max(indexes) + 1
return np.eye(n_values)[indexes]
I use text8 corpus and have applied preprocessing techniques such as stemming, lemmatization and subsampling. Also I've excluded English stop words and limited vocabulary
vocab_size = 20000
text_len = len(text)
test_text_len = int(text_len*0.15)
preprocessed_text = PreprocessedText(text,vocab_size)
I use tensorflow for graph computation
train_graph = tf.Graph()
with train_graph.as_default():
inputs = tf.placeholder(tf.int32, [None], name='inputs')
labels = tf.placeholder(tf.int32, [None, None], name='labels')
n_embedding = 300
with train_graph.as_default():
embedding = tf.Variable(tf.random_uniform((vocab_size, n_embedding), -1, 1))
embed = tf.nn.embedding_lookup(embedding, inputs)
And apply negative sampling
# Number of negative labels to sample
n_sampled = 100
with train_graph.as_default():
softmax_w = tf.Variable(tf.truncated_normal((vocab_size, n_embedding))) # create softmax weight matrix here
softmax_b = tf.Variable(tf.zeros(vocab_size), name="softmax_bias") # create softmax biases here
# Calculate the loss using negative sampling
loss = tf.nn.sampled_softmax_loss(
weights=softmax_w,
biases=softmax_b,
labels=labels,
inputs=embed,
num_sampled=n_sampled,
num_classes=vocab_size)
cost = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer().minimize(cost)
Finally I train my model
epochs = 10
batch_size = 64
avg_loss = []
with train_graph.as_default():
saver = tf.train.Saver()
with tf.Session(graph=train_graph) as sess:
iteration = 1
loss = 0
sess.run(tf.global_variables_initializer())
for e in range(1, epochs+1):
batches = skip_gram_batcher.get_batches(batch_size)
start = time.time()
for batch_x,batch_y in batches:
feed = {inputs: batch_x,
labels: np.array(batch_y)[:, None]}
train_loss, _ = sess.run([cost, optimizer], feed_dict=feed)
loss += train_loss
if iteration % 100 == 0:
end = time.time()
print("Epoch {}/{}".format(e, epochs),
"Iteration: {}".format(iteration),
"Avg. Batch loss: {:.4f}".format(loss/iteration),
"{:.4f} sec/batch".format((end-start)/100))
#loss = 0
avg_loss.append(loss/iteration)
start = time.time()
iteration += 1
save_path = saver.save(sess, "checkpoints/text8.ckpt")
But after running this model my average batch loss doesn't decrease dramatically
I guess I should have made a mistake somewhere. Any help is apprciated
What makes you say "my average batch loss doesn't decrease dramatically"? The graph you've attached shows some (unlabeled) value decreasing significantly, and still decreasing at a strong slope towards the end of data.
"Convergence" would show up as the improvement-in-loss first slowing, then stopping.
But if your loss is still noticeably dropping, just keep training! Using more epochs can be especially important on small datasets – like the tiny text8 you're using.
I have been working on a vanilla char rnn in tensorflow. I am not able it to produce any thing sensible even after training it a couple of hours. The code is tf version of Keras code from Chollet's Deep learning with pythonGithub
I tried playing around with hyper params without much success. Chollet mentioned in the book that the model produced good output after 80 epochs. I have able to get anything resonable after 50K+ epochs :( Curious if there is something I missed while converting this code to tensorflow.
n_layers = 1
num_units = 128
batch_size = 150
X = tf.placeholder(tf.float32, [None, maxlen, len(unique_chars)], name="Placeholder_X")
y = tf.placeholder(tf.int64, [None, len(unique_chars)], name="Placeholder_Y")
lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=num_units) for layer in range(n_layers)]
multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)
outputs, current_state = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)
top_layer_h_state = current_state[-1][1]
logits = tf.layers.dense(top_layer_h_state, len(unique_chars), name="softmax")
xentropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y)
loss = tf.reduce_mean(xentropy, name="loss")
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001)
training_op = optimizer.minimize(loss)
pred = tf.nn.softmax(logits)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
Sampling Code:
with tf.Session() as sess:
init.run()
saver.restore(sess, model_name)
# Output some data
start_index = random.randint(0, len(text) - maxlen - 1)
generated_text = text[start_index: start_index + maxlen]
print("Seed: ", generated_text)
final_string = ""
sampled = np.zeros((1, maxlen, len(unique_chars)))
for i in range(50):
for t, char in enumerate(generated_text):
sampled[0, t, char_to_idx[char]] = 1.
preds_eval = sess.run([pred], feed_dict={X: sampled})
preds=preds_eval[0][0]
next_index = sample(preds, 0.5)
next_char = unique_chars[next_index]
generated_text += next_char
final_string += next_char
generated_text = generated_text[1:]
print("New String: " , final_string)
Sample Input Seed: is,
as is generally acknowledged nowadays, no better sopori
Input generation:
maxlen = 60
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
sentences.append(text[i:i + maxlen])
next_chars.append(text[i + maxlen])
unique_chars = sorted(list(set(text)))
char_to_idx = dict((char, unique_chars.index(char)) for char in unique_chars)
data_X = np.zeros((len(sentences), maxlen, len(unique_chars)), dtype=np.float32)
data_Y = np.zeros((len(sentences), len(unique_chars)), dtype=np.int64)
for idx, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
data_X[idx, t, char_to_idx[char]] = 1
data_Y[idx, char_to_idx[next_chars[idx]]] = 1
Output from the model: vatsoéätlæéättire
It looks like you are trying to make a language model. I didn't read your entire code carefully. Just from the first part I noticed a couple of things. Why is your placeholder for x of type tf.float32 instead of integers? More importantly, why is the shape of y equal to batch size by vocab size? It should be batch_size by max_len -1 by vocab_size. In a language model you are always trying to predict the next character at every step. It's not a good way to train it to read a whole sequence of characters and then just predict one more at the end.