RNN: Get prediction from a text input after the model is trained - python

I am new to RNNs and I have been working on a small binary label classifier. I have been able to get a stable model with satisfactory results.
However, I am having a hard time using the model to classify new inputs and I was wondering if any of you could help me. Please see my code below for reference.
Thank you very much.
from tensorflow.keras import preprocessing
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras import models
from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input,
Embedding
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing import sequence, text
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib import pyplot
class tensor_rnn():
def __init__(self, hidden_layers=3):
self.data_path = 'C:\\\\Users\\cmazz\\PycharmProjects\\InvestmentAnalysis_2.0\\Sentiment\\Finance_Articles\\'
# self.corp_paths = corpora_paths
self.h_layers = hidden_layers
self.num_words = []
good = pd.read_csv(self.data_path + 'GoodO.csv')
good['Polarity'] = 'pos'
for line in good['Head'].tolist():
counter = len(line.split())
self.num_words.append(counter)
bad = pd.read_csv(self.data_path + 'BadO.csv')
bad['Polarity'] = 'neg'
for line in bad['Head'].tolist():
counter = len(line.split())
self.num_words.append(counter)
self.features = pd.concat([good, bad]).reset_index(drop=True)
self.features = shuffle(self.features)
self.max_len = len(max(self.features['Head'].tolist()))
# self.train, self.test = train_test_split(features, test_size=0.33, random_state=42)
X = self.features['Head']
Y = self.features['Polarity']
le = LabelEncoder()
Y = le.fit_transform(Y)
Y = Y.reshape(-1, 1)
self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.30)
self.tok = preprocessing.text.Tokenizer(num_words=len(self.num_words))
self.tok.fit_on_texts(self.X_train)
sequences = self.tok.texts_to_sequences(self.X_train)
self.sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen=self.max_len)
def RNN(self):
inputs = Input(name='inputs', shape=[self.max_len])
layer = Embedding(len(self.num_words), 30, input_length=self.max_len)(inputs)
# layer = LSTM(64, return_sequences=True)(layer)
layer = LSTM(32)(layer)
layer = Dense(256, name='FC1')(layer)
layer = Activation('relu')(layer)
layer = Dropout(0.5)(layer)
layer = Dense(1, name='out_layer')(layer)
layer = Activation('sigmoid')(layer)
model = Model(inputs=inputs, outputs=layer)
return model
def model_train(self):
self.model = self.RNN()
self.model.summary()
self.model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy']) # RMSprop()
def model_test(self):
self.history = self.model.fit(self.sequences_matrix, self.Y_train, batch_size=100, epochs=3,
validation_split=0.30, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
test_sequences = self.tok.texts_to_sequences(self.X_test)
test_sequences_matrix = sequence.pad_sequences(test_sequences, maxlen=self.max_len)
accr = self.model.evaluate(test_sequences_matrix, self.Y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0], accr[1]))
if __name__ == "__main__":
a = tensor_rnn()
a.model_train()
a.model_test()
a.model.save('C:\\\\Users\\cmazz\\PycharmProjects\\'
'InvestmentAnalysis_2.0\\RNN_Model.h5',
include_optimizer=True)
b = models.load_model('C:\\\\Users\\cmazz\\PycharmProjects\\'
'InvestmentAnalysis_2.0\\RNN_Model.h5')
stringy = ['Fund managers back away from Amazon as they cut FANG exposure']
prediction = b.predict(np.array(stringy))
print(prediction)
When I run my code I get the following error:
ValueError: Error when checking input: expected inputs to have shape
(39,) but got array with shape (1,)

Based on the ValueError and prediction = b.predict(np.array(stringy)), I think you need to tokenize your input string.

Related

Layer model expects 3 input(s), but it received 1 input tensors - Error raised when trying to fit the model

I am doing a simple cyberbullying detection model using BERT embeddings and TensorFlow. My dataset has two columns (label and text). It is a binary classification problem since 0 means a tweet is harassing and 1 is neutral.
You can find the altered_data dataset that I am using here. The original dataset can be found here.
Following the TensorFlow documentation for fine-tuning BERT, I have built the following model:
!git clone --depth 1 -b v2.3.0 https://github.com/tensorflow/models.git
!pip install -Uqr models/official/requirements.txt
After this, a runtime restart is required. Then the following code is executed.
import sys
sys.path.append('models')
import tensorflow as tf
import tensorflow_hub as tf_hub
import numpy as np
import pandas as pd
import sklearn
from official.nlp.data import classifier_data_lib
from official.nlp.bert import tokenization
from official.nlp import optimization
from sklearn.model_selection import train_test_split
df = pd.read_csv('altered_data.csv', encoding='utf-8')
train_df, remaining = train_test_split(df, random_state=42, train_size=0.90, stratify=df.sentiment.values)
valid_df, _ = train_test_split(remaining, random_state=42, train_size=0.90, stratify=remaining.sentiment.values)
train_df.shape, valid_df.shape
with tf.device('/cpu:0'):
train_data = tf.data.Dataset.from_tensor_slices((train_df['sentiment'].values, train_df['tweet'].values))
valid_data = tf.data.Dataset.from_tensor_slices((valid_df.sentiment.values, valid_df.tweet.values))
for label, text in train_data.take(1):
print(label)
print(text)
for label, text in valid_data.take(1):
print(label)
print(text)
label_list = [0, 1]
max_seq_length = 128
train_batch_size = 32
bert_layer = tf_hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2", trainable=True)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)
def to_feature(text, label, label_list=label_list, max_seq_length=max_seq_length, tokenizer=tokenizer):
example = classifier_data_lib.InputExample(guid=None,
text_a = text.numpy(),
text_b = None,
label = label.numpy())
feature = classifier_data_lib.convert_single_example(0, example, label_list, max_seq_length, tokenizer)
return (feature.input_ids, feature.input_mask, feature.segment_ids, feature.label_id)
def to_feature_map(label, text):
input_ids, input_mask, segment_ids, label_id = tf.py_function(to_feature, inp=[label, text],
Tout=[tf.int32, tf.int32, tf.int32, tf.int32])
input_ids.set_shape([max_seq_length])
input_mask.set_shape([max_seq_length])
segment_ids.set_shape([max_seq_length])
label_id.set_shape([])
x = {
'input_word_ids': input_ids,
'input_mask': input_mask,
'input_type_ids': segment_ids
}
return (label, x)
with tf.device('/cpu:0'):
train_data = (train_data.map(to_feature_map,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
.shuffle(1000)
.batch(32, drop_remainder=True)
.prefetch(tf.data.experimental.AUTOTUNE))
valid_data = (valid_data.map(to_feature_map,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
.batch(32, drop_remainder=True)
.prefetch(tf.data.experimental.AUTOTUNE))
Train Data tensor spec
train_data.element_spec
Create model code:
def create_model():
input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
name="input_word_ids")
input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
name="input_mask")
input_type_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
name="input_type_ids")
pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, input_type_ids])
drop = tf.keras.layers.Dropout(0.4)(pooled_output)
output = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(drop)
model = tf.keras.Model(
inputs={
'input_word_ids': input_word_ids,
'input_mask': input_mask,
'input_type_ids': input_type_ids
}, outputs=output)
return model
model = create_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[tf.keras.metrics.BinaryAccuracy()])
model.summary()
epoch = 4
history = model.fit(train_data,
validation_data=valid_data,
epochs=epoch,
verbose=1)
Raises the following error:
I am still a beginner in TensorFlow. To my understanding, there is a problem between the dropout and dense layers, while getting the inputs. However, I can't figure out what needs to be done in order to fix this. Any help would be appreciated!

GridSearchCV results are not reproducable

Im new to Keras and I need your professional help.
I have used GridSearchCV to optmize my regression network. When i try to use the results, the newly created network is far worse in regards to the mean squared error than the one calculated by GridSearch.
The GridSearchCV code:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from time import time
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Dropout, LeakyReLU
from keras.utils import plot_model
from keras.optimizers import SGD, rmsprop, adam
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from keras.initializers import uniform, normal, glorot_uniform
from keras.losses import MAPE
#Data preprocessing
def get_data():
data = pd.read_csv("test.csv", sep=";", usecols=["rHsubLS","b","lowerSetpoint"])
test = data.loc[:,['rHsubLS','b']]
target = data.loc[:,'lowerSetpoint']
print(test.shape)
print(target.shape)
return test.astype(float), target.astype(float)
def split_data(test, target):
X_train, X_test, y_train, y_test = train_test_split(test, target)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
stdsc1 = StandardScaler()
train_data_std = stdsc1.fit_transform(X_train)
test_data_std = stdsc1.fit_transform(X_test)
y_train_1 = np.reshape(y_train, (-1, 1))
y_test_1 = np.reshape(y_test, (-1, 1))
train_target_std = stdsc1.fit_transform(y_train_1)
test_target_std = stdsc1.fit_transform(y_test_1)
return train_data_std, test_data_std, train_target_std, test_target_std
#Network Creation
def create_NN(optimizer='rmsprop', init='glorot_uniform', alpha=0.15, activation_func='tanh'):
NN_model = Sequential()
#input layer
NN_model.add(Dense(128, kernel_initializer=init, input_dim=2, activation=activation_func))
#hidden layers
NN_model.add(LeakyReLU(alpha=alpha))
NN_model.add(Dense(256, kernel_initializer=init, activation='relu'))
#output layer
NN_model.add(Dense(1, kernel_initializer=init, activation='linear'))
NN_model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=["mse", "mape"])
NN_model.summary()
return NN_model
#GridSearchCV
def train_NN(NN_model, train_data, train_target):
seed = 4
np.random.seed(seed)
model = KerasRegressor(build_fn=create_NN, verbose=1)
optimizers = ['rmsprop', 'adam', 'SGD']
inits = ['glorot_uniform', 'normal', 'uniform', 'he_uniform']
activation_funcs = ['tanh','relu','softmax']
epochs = [50, 100, 150]
batches = [50, 100, 500]
alphas = [0.15, 0.45, 0.3]
grid_parameter = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=inits, alpha=alphas, activation_func=activation_funcs)#, dropout_rate=dropout)
if __name__ == '__main__':
grid = GridSearchCV(estimator=model, scoring='neg_mean_squared_error' , param_grid=grid_parameter, verbose=1, cv=3)
grid_results = grid.fit(train_data, train_target, use_multiprocessing=True, shuffle=True, workers=8)
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
try:
test, target = get_data()
train_data, test_data, train_target, test_target = split_data(test, target)
print("Data split\n")
NN_model = create_NN()
train_NN(NN_model, train_data, train_target)
except (KeyboardInterrupt, SystemExit):
raise
The results of the GridSearch:
Best: -0.000064 using {'activation_func': 'relu', 'alpha': 0.3, 'batch_size': 50, 'epochs': 150, 'init': 'he_uniform', 'optimizer': 'adam'}
When I try to reproduce this network with this code:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from time import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Dropout, PReLU, LeakyReLU
from keras.utils import plot_model
from keras.optimizers import SGD
from keras.losses import MeanAbsolutePercentageError
def get_data():
data = pd.read_csv("test.csv", sep=";", usecols=["rHsubLS","b","lowerSetpoint"])
test = data.loc[:,['rHsubLS','b']]
target = data.loc[:,'lowerSetpoint']
print(test.shape)
print(target.shape)
return test.astype(float), target.astype(float)
def split_data(test, target):
X_train, X_test, y_train, y_test = train_test_split(test, target)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
stdsc1 = StandardScaler()
train_data_std = stdsc1.fit_transform(X_train)
test_data_std = stdsc1.fit_transform(X_test)
y_train_1 = np.reshape(y_train, (-1, 1))
y_test_1 = np.reshape(y_test, (-1, 1))
train_target_std = stdsc1.fit_transform(y_train_1)
test_target_std = stdsc1.fit_transform(y_test_1)
return train_data_std, test_data_std, train_target_std, test_target_std
def create_NN():
NN_model = Sequential()
#input layer
NN_model.add(Dense(128, input_dim=2, kernel_initializer='he_uniform', activation='relu'))
#hidden layers
NN_model.add(LeakyReLU(0.3))
NN_model.add(Dense(256, kernel_initializer='he_uniform', activation='relu'))
#output layer
NN_model.add(Dense(1, activation='linear'))
keras.backend.set_epsilon(1)
NN_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse','mape'])
NN_model.summary()
return NN_model
def train_NN(NN_model, train_data, train_target, test_data, test_target):
history = NN_model.fit(train_data, train_target, epochs=150, shuffle=True, batch_size=50, verbose=1, use_multiprocessing=True)
return history
def test_NN(NN_model, test_data, test_target, train_data, train_target):
mean_test = NN_model.evaluate(test_data, test_target, verbose=1)
mean_train = NN_model.evaluate(train_data, train_target, verbose=1)
return mean_test, mean_train
try:
seed = 4
np.random.seed(seed)
test, target = get_data()
train_data, test_data, train_target, test_target = split_data(test, target)
print("Data split\n")
NN_model = create_NN()
print("Neural Network created\n")
history = train_NN(NN_model, train_data, train_target, test_data, test_target)
mean_test, mean_train = test_NN(NN_model, test_data, test_target, train_data, train_target)
print("Durchschnittliche Abweichung Training: ", mean_train)
print("Durchschnittliche Abweichung Test: ", mean_test)
print(NN_model.metrics_names)
NN_model.save('Regelung_v1.h5')
print("Neural Network saved")
except (KeyboardInterrupt, SystemExit):
raise
I get this result:
mse loss training data: 0.028168134637475015;
mse loss test data: 0.028960488473176955
The mean average percentage error is at about 9%. This result is not what i expected.
Where is my mistake?
Thank you for your help in advance
Have a nice day!
PC Specs:
Intel i5 4570
16GB RAM + 16 GB page file
Nvidia GTX 1070
3 TB HDD
Software:
Windows 10
Geforce Game ready driver 451.48
Tensorflow 2.2.0
Keras 2.3.1
Sklearn 0.23.1
Cuda 10.1
Python 3.7.7
Edit: Here are a few lines of the test.csv
TIMESTAMP;rHsubLS;b;lowerSetpoint
20200714091423000.00000000000;2.28878288783;-0.74361743617;-0.27947195702
20200714091423000.00000000000;0.13274132741;-0.94552945529;-0.32351276857
20200714091423000.00000000000;1.85753857539;0.77844778448;0.22244954249
20200714091423000.00000000000;1.31896318963;0.44518445184;0.33573301999
20200714091423000.00000000000;2.55885558856;-0.77792777928;-0.28837806344
The Output layer had its initialization weight missing:
NN_model.add(Dense(1, kernel_initializer='he_uniform', activation='linear'))

Prediction using Keras NLP

I am a beginner in the field of Neural Networks.
I am trying to implement an LSTM model for predicting the secondary structure of a protein from a given primary sequence. The base kernel on which my program is based can be found on Kaggle - https://www.kaggle.com/helmehelmuto/secondary-structure-prediction-with-keras
I successfully trained the model, saved the model to a pickle file, and I am able to use the pickle file to load weights to my model and make predictions as well. However, these predictions are on the test set which was created by the test_train_split function in Keras.
I aim to feed in a String containing the sequence of a protein and get its secondary structure prediction.
The code uses Tokenizer to convert the data (the protein sequences) from the dataset into a numpy.ndarray which is used for making predictions.
The part where I am stuck is, taking a string as input(some protein sequence) and converting it into the same class and then making predictions on it.
I have tried using the same method the Kaggle Kernel author used for converting the data from the .csv file to a numpy.ndarray but I get an error - 'numpy.ndarray' object has no attribute 'lower'.
I would be grateful if someone could guide me here, converting strings to the objects which are used for prediction in the code.
Code (works fine, predictions made directly on the test set generated by test_train_split):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
df = pd.read_csv(r'C:\Users\Viktor\Desktop\2018-06-06-ss.cleaned.csv')
df.len.hist(bins=100)
print(df.shape)
def seq2ngrams(seqs, n=3):
return np.array([[seq[i:i+n] for i in range(len(seq))] for seq in seqs])
maxlen_seq = 128
input_seqs, target_seqs = df[['seq', 'sst3']][(df.len <= maxlen_seq) & (~df.has_nonstd_aa)].values.T
input_grams = seq2ngrams(input_seqs)
print(len(input_seqs))
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
tokenizer_encoder = Tokenizer()
tokenizer_encoder.fit_on_texts(input_grams)
input_data = tokenizer_encoder.texts_to_sequences(input_grams)
input_data = sequence.pad_sequences(input_data, maxlen=maxlen_seq, padding='post')
tokenizer_decoder = Tokenizer(char_level=True)
tokenizer_decoder.fit_on_texts(target_seqs)
target_data = tokenizer_decoder.texts_to_sequences(target_seqs)
target_data = sequence.pad_sequences(target_data, maxlen=maxlen_seq, padding='post')
target_data = to_categorical(target_data)
input_data.shape, target_data.shape
from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Bidirectional
n_words = len(tokenizer_encoder.word_index) + 1
n_tags = len(tokenizer_decoder.word_index) + 1
print(n_words, n_tags)
input = Input(shape=(maxlen_seq,))
x = Embedding(input_dim=n_words, output_dim=128, input_length=maxlen_seq)(input)
x = Bidirectional(LSTM(units=64, return_sequences=True, recurrent_dropout=0.1))(x)
y = TimeDistributed(Dense(n_tags, activation="softmax"))(x)
model = Model(input, y)
model.summary()
from sklearn.model_selection import train_test_split
from keras.metrics import categorical_accuracy
from keras import backend as K
import tensorflow as tf
def q3_acc(y_true, y_pred):
y = tf.argmax(y_true, axis=-1)
y_ = tf.argmax(y_pred, axis=-1)
mask = tf.greater(y, 0)
return K.cast(K.equal(tf.boolean_mask(y, mask), tf.boolean_mask(y_, mask)), K.floatx())
model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy", q3_acc])
X_train, X_test, y_train, y_test = train_test_split(input_data, target_data, test_size=.4, random_state=0)
seq_train, seq_test, target_train, target_test = train_test_split(input_seqs, target_seqs, test_size=.4, random_state=0)
#model.fit(X_train, y_train, batch_size=128, epochs=5, validation_data=(X_test, y_test), verbose=1)
def onehot_to_seq(oh_seq, index):
s = ''
for o in oh_seq:
i = np.argmax(o)
if i != 0:
s += index[i]
else:
break
return s
def plot_results(x, y, y_):
print("---")
print("Input: " + str(x))
print("Target: " + str(onehot_to_seq(y, revsere_decoder_index).upper()))
print("Result: " + str(onehot_to_seq(y_, revsere_decoder_index).upper()))
fig = plt.figure(figsize=(10,2))
plt.imshow(y.T, cmap='Blues')
plt.imshow(y_.T, cmap='Reds', alpha=.5)
plt.yticks(range(4), [' '] + [revsere_decoder_index[i+1].upper() for i in range(3)])
plt.show()
revsere_decoder_index = {value:key for key,value in tokenizer_decoder.word_index.items()}
revsere_encoder_index = {value:key for key,value in tokenizer_encoder.word_index.items()}
#N=3
#y_train_pred = model.predict(X_train[:N])
#y_test_pred = model.predict(X_test[:N])
#print('training')
#for i in range(N):
# plot_results(seq_train[i], y_train[i], y_train_pred[i])
#print('testing')
#for i in range(N):
# plot_results(seq_test[i], y_test[i], y_test_pred[i])
loaded_model = pickle.load(open( "save.p", "rb" ))
N=3
y_train_pred = loaded_model.predict(X_train[:N])
y_test_pred = loaded_model.predict(X_test[:N])
print('training')
for i in range(N):
plot_results(seq_train[i], y_train[i], y_train_pred[i])
print('testing')
for i in range(N):
plot_results(seq_test[i], y_test[i], y_test_pred[i])
#print(type(target_seqs))
CODE WHICH DOES NOT WORK AS EXPECTED:
In this, 'xf' is the csv file which has the sequences for which I require the predictions.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
df = pd.read_csv(r'C:\Users\Viktor\Desktop\2018-06-06-ss.cleaned.csv')
xf = pd.read_csv(r'C:\Users\Viktor\Desktop\sequence.csv')
df.len.hist(bins=100)
print(df.shape)
def seq2ngrams(seqs, n=3):
return np.array([[seq[i:i+n] for i in range(len(seq))] for seq in seqs])
maxlen_seq = 128
input_seqs, target_seqs = df[['seq', 'sst3']][(df.len <= maxlen_seq) & (~df.has_nonstd_aa)].values.T
input_grams = seq2ngrams(input_seqs)
print(len(input_seqs))
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
tokenizer_encoder = Tokenizer()
tokenizer_encoder.fit_on_texts(input_grams)
input_data = tokenizer_encoder.texts_to_sequences(input_grams)
input_data = sequence.pad_sequences(input_data, maxlen=maxlen_seq, padding='post')
tokenizer_decoder = Tokenizer(char_level=True)
tokenizer_decoder.fit_on_texts(target_seqs)
target_data = tokenizer_decoder.texts_to_sequences(target_seqs)
target_data = sequence.pad_sequences(target_data, maxlen=maxlen_seq, padding='post')
target_data = to_categorical(target_data)
input_data.shape, target_data.shape
from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Bidirectional
n_words = len(tokenizer_encoder.word_index) + 1
n_tags = len(tokenizer_decoder.word_index) + 1
print(n_words, n_tags)
input = Input(shape=(maxlen_seq,))
x = Embedding(input_dim=n_words, output_dim=128, input_length=maxlen_seq)(input)
x = Bidirectional(LSTM(units=64, return_sequences=True, recurrent_dropout=0.1))(x)
y = TimeDistributed(Dense(n_tags, activation="softmax"))(x)
model = Model(input, y)
model.summary()
from sklearn.model_selection import train_test_split
from keras.metrics import categorical_accuracy
from keras import backend as K
import tensorflow as tf
model.compile(optimizer="rmsprop", loss="categorical_crossentropy")
X_train, X_test, y_train, y_test = train_test_split(input_data, target_data, test_size=.4, random_state=0)
seq_train, seq_test, target_train, target_test = train_test_split(input_seqs, target_seqs, test_size=.4, random_state=0)
#model.fit(X_train, y_train, batch_size=128, epochs=1, validation_data=(X_test, y_test), verbose=1)
def onehot_to_seq(oh_seq, index):
s = ''
for o in oh_seq:
i = np.argmax(o)
if i != 0:
s += index[i]
else:
break
return s
def plot_results(x, y, y_):
print("---")
print("Input: " + str(x))
print("Target: " + str(onehot_to_seq(y, revsere_decoder_index).upper()))
print("Result: " + str(onehot_to_seq(y_, revsere_decoder_index).upper()))
fig = plt.figure(figsize=(10,2))
plt.imshow(y.T, cmap='Blues')
plt.imshow(y_.T, cmap='Reds', alpha=.5)
plt.yticks(range(4), [' '] + [revsere_decoder_index[i+1].upper() for i in range(3)])
plt.show()
revsere_decoder_index = {value:key for key,value in tokenizer_decoder.word_index.items()}
revsere_encoder_index = {value:key for key,value in tokenizer_encoder.word_index.items()}
N=3
y_train_pred = model.predict(X_train[:N])
y_test_pred = model.predict(X_test[:N])
print('training')
for i in range(N):
plot_results(seq_train[i], y_train[i], y_train_pred[i])
print('testing')
for i in range(N):
plot_results(seq_test[i], y_test[i], y_test_pred[i])
loaded_model = pickle.load(open( "save.p", "rb" ))
N=3
y_train_pred = loaded_model.predict(X_train[:N])
y_test_pred = loaded_model.predict(X_test[:N])
print('training')
for i in range(N):
plot_results(seq_train[i], y_train[i], y_train_pred[i])
print('testing')
for i in range(N):
plot_results(seq_test[i], y_test[i], y_test_pred[i])
print("-----")
print(X_test[:3])
print("-----")
xf.len.hist(bins=100)
input_seqs1, target_seqs1 = xf[['seq', 'sst3']][(xf.len <= maxlen_seq) & (~xf.has_nonstd_aa)].values.T
input_grams1 = seq2ngrams(input_seqs1)
tokenizer_encoder1 = Tokenizer()
tokenizer_encoder1.fit_on_texts(input_grams1)
input_data1 = tokenizer_encoder1.texts_to_sequences(input_grams1)
input_data1 = sequence.pad_sequences(input_data1, maxlen=maxlen_seq, padding='post')
tokenizer_decoder1 = Tokenizer(char_level=True)
tokenizer_decoder1.fit_on_texts(target_seqs1)
target_data1 = tokenizer_decoder1.texts_to_sequences(target_seqs1)
target_data1 = sequence.pad_sequences(target_data1, maxlen=maxlen_seq, padding='post')
target_data1 = to_categorical(target_data1)
input_data1.shape, target_data1.shape
X_train, X_test, y_train, y_test = train_test_split(input_data1, target_data1, test_size=1, random_state=0)
seq_train, seq_test, target_train, target_test = train_test_split(input_seqs1, target_seqs1, test_size=1, random_state=0)
y_train_pred1 = loaded_model.predict(X_train)
y_test_pred1 = loaded_model.predict(X_test)
plot_results(seq_train, y_train, y_train_pred)
plot_results(seq_test, y_test, y_test_pred)
#print(input_data1[0])
##y_train_pred = loaded_model.predict(input_data)
#y_test_pred1 = loaded_model.predict(input_data1[0])
##plot_results(seq_train, y_train, y_train_pred)
#plot_results(input_seqs1, target_data1, y_test_pred1)
TRACEBACK:
Traceback (most recent call last):
File "<ipython-input-38-e8f27dda0841>", line 1, in <module>
runfile('C:/Users/Viktor/Desktop/rost_nocl.py', wdir='C:/Users/Viktor/Desktop')
File "D:\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "D:\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Viktor/Desktop/rost_nocl.py", line 116, in <module>
tokenizer_encoder1.fit_on_texts(input_grams1)
File "D:\Anaconda3\lib\site-packages\keras_preprocessing\text.py", line 223, in fit_on_texts
self.split)
File "D:\Anaconda3\lib\site-packages\keras_preprocessing\text.py", line 43, in text_to_word_sequence
text = text.lower()
AttributeError: 'numpy.ndarray' object has no attribute 'lower'

Keras 2.2.4 fit_generator problem . Value error, Problem in inputting values to the input layer

I am running Keras multi_gpu model. My model takes 2 inputs. one input is given by the Imagedatagenerator and other input is generated through a function inside the model. please have a look at the following code:
import numpy as np
import keras
from keras.layers.convolutional import Conv2D
from keras.layers import ReLU,MaxPooling2D,ZeroPadding2D,BatchNormalization,Dense,Dropout, Activation, Flatten, Lambda, Concatenate, Add
from keras.models import Model
from keras.layers import Input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras import backend as K
from keras_preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json
from keras.utils import multi_gpu_model
import pandas as pd
import os
import sys
from tqdm import *
# import skimage
import matplotlib.pyplot as plt
# %matplotlib inline
import cv2
import tensorflow as tf
import multiprocessing
# import pydot
########### Make Log directory #####################################
cwd=os.getcwd()
log_dir = cwd+'/log_dir/Relation_net_logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
tensorboard_logsdir = log_dir+"/tensorboard_logdir"
if not os.path.exists(tensorboard_logsdir):
os.makedirs(tensorboard_logsdir)
######### Make Network##############################################
def ConvolutionNetworks(kernel_size=3, stride_size=2):
def conv(model):
model = Conv2D(24, (9, 9), strides=(stride_size, stride_size),activation='relu',input_shape=(100, 100, 3), data_format='channels_last')(model)
model = BatchNormalization()(model)
model = Conv2D(24, (7, 7), strides=(stride_size, stride_size),activation='relu')(model)
model = BatchNormalization()(model)
model = Conv2D(24, (kernel_size, kernel_size), strides=(stride_size, stride_size),activation='relu')(model)
model = BatchNormalization()(model)
model = Conv2D(24, (5, 5), strides=(1, 1),activation='relu')(model)
model = BatchNormalization()(model)
return model
return conv
######### Compute Relations #######
def compute_relations(objects):
def get_top_dim_1(t):
return t[:, 0, :, :]
def get_all_but_top_dim_1(t):
return t[:, 1:, :, :]
def get_top_dim_2(t):
return t[:, 0, :]
def get_all_but_top_dim2(t):
return t[:, 1:, :]
slice_top_dim_1 = Lambda(get_top_dim_1)
slice_all_but_top_dim_1 = Lambda(get_all_but_top_dim_1)
slice_top_dim_2 = Lambda(get_top_dim_2)
slice_all_but_top_dim2 = Lambda(get_all_but_top_dim2)
d = K.int_shape(objects)[2]
features = []
for i in range(d): #This loop extracts top layer of the feature map
features1 = slice_top_dim_1(objects)
objects = slice_all_but_top_dim_1(objects)
for j in range(d): #This loop extract each object from the "top layer" extracted in the previous loop and append it in variable "features"
features2 = slice_top_dim_2(features1)
features1 = slice_all_but_top_dim2(features1)
features.append(features2)
relations = []
concat = Concatenate()
for feature1 in features:
for feature2 in features:
relations.append(concat([feature1, feature2]))
return relations
############## f_theta ############################
def f_theta():
def f(model):
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
# model = Dropout(0.5)(model)
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
return model
return f
################# Relation module and tag building #########################################
from keras.utils import plot_model
def g_th(layers):
def f(model):
for n in range(len(layers)):
model = layers[n](model)
return model
return f
def stack_layer(layers):
def f(x):
for k in range(len(layers)):
x = layers[k](x)
return x
return f
def g_theta(h_unit=256, layers=4):
r = []
for k in range(layers):
r.append(Dense(h_unit))
r.append(Activation('relu'))
return g_th(r)
def get_MLP():
return g_th()
def RelationNetworks(objects):
g_t = g_theta()
relations = compute_relations(objects)
print("length of relations={}".format(len(relations)))
g_all = []
for r in tqdm(relations):
g_all.append(g_t(r)) #send each relation to g_t and append to a list for easy summation.
print("relation computed")
combined_relation = Add()(g_all)
print("relation combined")
f_out = f_theta()(combined_relation)
print("relation went through f_theta")
return f_out
def build_tag(conv):
d = K.int_shape(conv)[2]
tag = np.zeros((d,d,2))
print("tagging in process")
for i in range(d):
for j in range(d):
tag[i,j,0] = float(int(i%d))/(d-1)*2-1
tag[i,j,1] = float(int(j%d))/(d-1)*2-1
tag = K.variable(tag)
tag = K.expand_dims(tag, axis=0)
batch_size = K.shape(conv)[0]
tag = K.tile(tag, [batch_size,1,1,1])
print("tagging done")
return Input(tensor=tag)
################################# Build Model ###################################################################################
visual_scene = Input((100, 100, 3))
# visual_question = Input((11,))
visual_conv = ConvolutionNetworks()(visual_scene)
tag = build_tag(visual_conv)
visual_conv = Concatenate()([tag, visual_conv])
visual_RN = RelationNetworks(visual_conv)
visual_out = Dense(4, activation='softmax')(visual_RN)
VisualModel = Model(inputs=[tag,visual_scene], outputs=visual_out)
print("model made")
# plot_model(VisualModel, to_file='/home/aakash/Relation_Network/figures/VisualModel1.png')
################################ Create parallel model ###############
# This executes Data Parallelism. Batch is divided equally on all GPUs for computation
try:
parallel_model = multi_gpu_model(VisualModel, cpu_merge=True, cpu_relocation=True,gpus=2)
print("Training using multiple GPUs..")
except:
parallel_model = model
print("Training using single GPU or CPU..")
################################# Training #################################################################################
workers=multiprocessing.cpu_count()-1
batchsize=32
IMG_SIZE=100
train_df_path="/home/aakash/Relation_Network/training_df.pkl"
valid_df_path="/home/aakash/Relation_Network/validation_df.pkl"
image_dir="/home/aakash/Relation_Network/DL_Dataset"
from keras.optimizers import Adam
lr = 1e-4
adam = Adam(lr=lr)
parallel_model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
#Save architecture
NAME = "2_conv_model"
with open(NAME+".json", "w") as json_file:
json_file.write(VisualModel.to_json())
print("model architecture saved as json file")
#create callbacks
# NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
checkpoint = keras.callbacks.ModelCheckpoint(log_dir+'/'+NAME+'.h5', monitor='val_loss',verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1)
csv_logger = keras.callbacks.CSVLogger(log_dir+"/"+NAME+".csv", separator=',', append=False)
tensorboard = keras.callbacks.TensorBoard(log_dir=tensorboard_logsdir+'/'+NAME, histogram_freq=0, batch_size=batchsize, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0,
embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch')
training_df=pd.read_pickle(train_df_path)
validation_df=pd.read_pickle(valid_df_path)
datagen=ImageDataGenerator(rescale=1./255)
train_generator=datagen.flow_from_dataframe(dataframe=training_df, directory=image_dir,
x_col="image", y_col="lesion", class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE), batch_size=batchsize,shuffle=True)
validation_generator=datagen.flow_from_dataframe(dataframe=validation_df, directory=image_dir,
x_col="image", y_col="lesion", class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE), batch_size=batchsize)
parallel_model.fit_generator(generator = train_generator,
steps_per_epoch = (training_df.shape[0])//batchsize,
validation_data = validation_generator,
validation_steps = (validation_df.shape[0])//batchsize,
epochs = 30,verbose=1,callbacks=[checkpoint, csv_logger,tensorboard],
use_multiprocessing=True,workers=workers)
build_tag function returns an input layer with a tensor (this is my second input).
But when I run this code, it shows the following error
!(https://drive.google.com/file/d/1gGjoO89zwRw_zUQ14sUIrdC7oRKrdVT1/view?usp=sharing)
I made the build_tag function a Lambda layer and the value returned by build_tag is just value "tag" and NOT an input layer and remove "tag" input to the model and it starts to work.
This is the model-architecture before converting the build_tag into Lambda layer and this one is after conversion.

Keras model.fit() showing loss as nan

I am trying to train my model for Instrument Detection. The output is displaying as loss: nan from the first epoch. I tried to change the loss function, activation function, and add some regularisation like Dropout, but it didn't affect the result.
Here is the code:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout
from keras.optimizers import Adam
import pickle
import os
import numpy as np
from sklearn.model_selection import train_test_split
def one_hot_encoding(target):
Instruments = ['vio', 'pia', 'tru', 'flu']
enc_tar = np.zeros([len(target), 4])
for i in range(len(target)):
enc_tar[i][Instruments.index(target[i])] = 1
return enc_tar
def create_model_cnn(inp_shape):
classifier = Sequential()
classifier.add(Conv2D(25, kernel_size = 3, activation = 'relu', input_shape = inp_shape))
classifier.add(Conv2D(10, kernel_size = 3, activation = 'relu'))
classifier.add(Flatten())
classifier.add(Dense(4, activation = 'softmax'))
adam = Adam(0.001)
classifier.compile(optimizer = adam, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
def create_model_mlp(inp_shape):
classifier = Sequential()
classifier.add(Dense(22, activation = 'softsign', input_shape = (42,)))
classifier.add(Dropout(0.25))
classifier.add(Dense(10, activation = 'softsign'))
classifier.add(Dropout(0.25))
classifier.add(Dense(4, activation = 'softmax'))
adam = Adam(0.0001)
classifier.compile(optimizer = adam, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
def get_weights(classifier):
return classifier.get_weights()
def set_weights(classifier, weights):
classifier.set_weights(weights)
return classifier
def train_model(classifier, data, target, epoch = 40):
classifier.fit(data, target, epochs = epoch, validation_split=0.4, batch_size = 32, verbose = 1)
return classifier
def predict(classifier, data):
return classifier.predict(data)
if __name__ == '__main__':
#Get the data and the target
[data, target] = pickle.load(open('../input/music-features/feat_targ.pickle', 'rb'))
#if 'model.pickle' not in os.listdir():
#Generate the classifiers
cnn_classifier = create_model_cnn((6, 7, 1))
mlp_classifier = create_model_mlp((42))
# else:
# #Load the existing model (from a pickle dump)
# classifier = pickle.load(open('model.pickle', 'rb'))
tr_data, tst_data, tr_target, tst_target = train_test_split(data, target)
tr_data_lin = np.array(tr_data)
tr_data = tr_data_lin.reshape((tr_data_lin.shape[0], 6, 7, 1))
tst_data_lin = np.array(tst_data)
tst_data = tst_data_lin.reshape((tst_data_lin.shape[0], 6, 7, 1))
enc_target = one_hot_encoding(tr_target)
#print(tr_data, enc_target)
# train_model(cnn_classifier, tr_data, enc_target)
train_model(mlp_classifier, tr_data_lin, enc_target)
# pickle.dump([cnn_classifier, mlp_classifier], open('model.pickle', 'wb'))
The training data and the test data are from the pickle file where the shape is (15000, 42).

Categories

Resources