I'm just trying to explore keras and tensorflow with the famous MNIST dataset.
I already applied some basic neural networks, but when it comes to tuning some hyperparameters, especially the number of layers, thanks to the sklearn wrapper GridSearchCV, I get the error below:
Parameter values for parameter (hidden_layers) need to be a sequence(but not a string) or np.ndarray.
So you can have a better view I post the main parts of my code.
Data preparation
# Extract label
X_train=train.drop(labels = ["label"],axis = 1,inplace=False)
Y_train=train['label']
del train
# Reshape to fit MLP
X_train = X_train.values.reshape(X_train.shape[0],784).astype('float32')
X_train = X_train / 255
# Label format
from keras.utils import np_utils
Y_train = keras.utils.to_categorical(Y_train, num_classes = 10)
num_classes = Y_train.shape[1]
Keras part
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
# Function with hyperparameters to optimize
def create_model(optimizer='adam', activation = 'sigmoid', hidden_layers=2):
# Initialize the constructor
model = Sequential()
# Add an input layer
model.add(Dense(32, activation=activation, input_shape=784))
for i in range(hidden_layers):
# Add one hidden layer
model.add(Dense(16, activation=activation))
# Add an output layer
model.add(Dense(num_classes, activation='softmax'))
#compile model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=
['accuracy'])
return model
# Model which will be the input for the GridSearchCV function
modelCV = KerasClassifier(build_fn=create_model, verbose=0)
GridSearchCV
from keras.activations import relu, sigmoid
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras.utils import np_utils
activations = [sigmoid, relu]
param_grid = dict(hidden_layers=3,activation=activations, batch_size = [256], epochs=[30])
grid = GridSearchCV(estimator=modelCV, param_grid=param_grid, scoring='accuracy')
grid_result = grid.fit(X_train, Y_train)
I just want to let you know that the same kind of question has already been asked here Grid Search the number of hidden layers with keras but the answer is not complete at all and I can't add a comment to reply to the answerer.
Thank you!
You should add:
for i in range(int(hidden_layers)):
# Add one hidden layer
model.add(Dense(16, activation=activation))
Try to add the values of param_grid as lists :
params_grid={"hidden_layers": [3]}
When you are setting your parameter hidden layer =2 it goes as a string thus an error it throw.
Ideally it should a sequence to run the code that's what you error says
Related
I have a dataset of size 273985 x 5 that I'm training as a path prediction problem. I chose an LSTM inspired by this paper: https://ieeexplore.ieee.org/abstract/document/9225479
I have a baseline implementation as such:
# lstm autoencoder recreate sequence
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.callbacks import EarlyStopping
from keras.layers import TimeDistributed
from keras.utils import plot_model
# define input sequence
my_sequence = np.array(sample)
# reshape input into [samples, timesteps, features]
n_in = len(my_sequence)
my_sequence = my_sequence.reshape((1, n_in, 5))
# define model
model = Sequential()
model.add(LSTM(10, activation='sigmoid', input_shape=(n_in,5)))
model.add(RepeatVector(n_in))
model.add(LSTM(10, activation='sigmoid', return_sequences=True))
model.add(TimeDistributed(Dense(5)))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(my_sequence, my_sequence, epochs=300, verbose=0)
# structure of the model and the layers
plot_model(model, show_shapes=True, to_file=path)
# demonstrate recreation
predicted = model.predict(my_sequence, verbose=0)
print(predicted)
print(my_sequence)
Right now, I am choosing my training sample by hand but I want to train my entire dataset much like bootstrapping where I train 1-50, predict the next 50; train 2-50, predict the next 50… until the end of the test set then compare my prediction against the actual values.
Would this be done via batching the data or k-fold validation? Also, how would one go about it or calculate the appropriate evaluation metric?
Thank you!
I'm working on multilabel emotion analysis and built a sequential multilayer model, then using Keras classifier and Gridsearchcv, I use the score function to evaluate the system. Then I tried to compare the results of the score with that from using predict_proba function followed by Jaccard_score with average =" micro" to suits multilabel, I expect to have the same results but actually, this didn't happen could anyone help in interpreting this.
My code :
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Flatten, LSTM
from keras.layers.core import Activation, Dropout, Dense
from tensorflow.keras.layers import Input
# Use scikit-learn to grid search
import sklearn
import sklearn.metrics as metrics
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import SCORERS
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model_multiclass(learn_rate, dropout_rate,activation):
# Create model
model=tf.keras.Sequential()
model.add(Dense( 300, kernel_initializer='normal', activation=activation))
model.add(Dropout(dropout_rate))
model.add(Dense(150, kernel_initializer='normal', activation=activation))
model.add(Dropout(dropout_rate))
model.add(Dense(50, kernel_initializer='normal', activation=activation))
model.add(Dropout(dropout_rate))
model.add(Dense(11, activation=activation))
# Compile the model
adam = Adam(learning_rate=learn_rate)
model.compile(loss='mse', optimizer=adam, metrics=['accuracy'])
return model
### Loading train data
###################
# # create model
# # Declare parameter values
model = KerasClassifier(build_fn=create_model_multiclass, verbose=10)
# Define the parameters that you wish to use in your Grid Search along
# with the list of values that you wish to try out
learn_rate = [0.001]
dropout_rate = [0.5]
batch_size = [ 60 ]
epochs = [20,60]
activation = [ 'relu', 'sigmoid' ]
print(sorted(sklearn.metrics.SCORERS.keys()))
scoring={'jaccard':'jaccard_micro','F1_score': 'f1_micro'}
# Make a dictionary of the grid search parameters
param_grid =dict(learn_rate=learn_rate, dropout_rate=dropout_rate, batch_size=batch_size, epochs=epochs,activation=activation )
# Build and fit the GridSearchCV
print(sorted(sklearn.metrics.SCORERS.keys()))
print("Tuning hyperparameters for accuracy")
grid = (GridSearchCV(estimator=model, param_grid=param_grid,return_train_score='True', refit={'jaccard'}, cv=KFold(5), verbose=15))
grid_results = (grid).fit(X_all,Y_all)
# Summarize the results in a readable format
print("Best: {0}, using {1}".format(grid_results.best_score_, grid_results.best_params_))
print(grid.best_params_)
print(grid.best_estimator_)
means = grid.cv_results_['mean_test_score']
stds = grid.cv_results_['std_test_score']
params = grid.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print('{0} ({1}) with: {2}'.format(mean, stdev, param))
print("The scores are computed on the full evaluation set.")
print("score",grid.score(X_dev,Y_dev))
y_pred=grid.predict_proba(X_dev)
y_pred_int=y_pred.round()
micro_multilabel_accuracy=jaccard_score(Y_dev, y_pred_int,average='micro')
print('micro_multilabel_accuracy',micro_multilabel_accuracy)
and the output I got :
Best: 0.5518947958946228, using {'activation': 'relu', 'batch_size': 60, 'dropout_rate': 0.5, 'epochs': 20, 'learn_rate': 0.001}
{'activation': 'relu', 'batch_size': 60, 'dropout_rate': 0.5, 'epochs': 20, 'learn_rate': 0.001}
<tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x0000023885BBED30>
score 0.5350427627563477
micro_multilabel_accuracy 0.3984088127294982
Here is my complete code. I'm trying to predict protein classes from protein sequences.
from sklearn.preprocessing import LabelBinarizer
# Transform labels to one-hot
lb = LabelBinarizer()
Y = lb.fit_transform(df.classification)
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
#maximum length of sequence, everything afterwards is discarded!
max_length = 500
#create and fit tokenizer
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(seqs)
X = tokenizer.texts_to_sequences(seqs)
X = sequence.pad_sequences(X, maxlen=max_length)
from __future__ import print_function
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional, Conv1D
from keras.layers.convolutional import MaxPooling1D
import tensorflow as tf
from tensorflow.keras import layers
embedding_vecor_length = 128
max_length = 500
model = Sequential()
model.add(Embedding(len(tokenizer.word_index)+1, embedding_vecor_length, input_length=max_length))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.2)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=512)
This is accuracy of the model
train-acc = 0.8485087800799034
test-acc = 0.8203392530062913
and my prediction results are:
[9.65313017e-02 1.33084046e-04 1.73516816e-03 4.62103529e-08
8.45071673e-03 2.42734270e-04 3.54182965e-04 2.88571493e-04
1.99087553e-05 8.92244339e-01]
[8.89207274e-02 1.99566261e-04 1.76228161e-04 2.08527595e-02
1.64435953e-01 2.83987029e-03 1.53038520e-02 7.07270563e-01
5.16798650e-07 2.19354401e-08]
[9.36142087e-01 6.09822795e-02 3.55492946e-09 2.19342492e-05
5.41335670e-04 1.89031591e-04 2.66434945e-04 1.84136129e-03
1.54582867e-05 3.31551647e-10]
Any help in this regard would be appreciated. I'm stuck with it and don't know how to solve it. Also, I'm kindda new to deep learning.
As you can see your last layer has an activation function of softmax function
model.add(Dense(10, activation='softmax'))
So when you predict values it passes through that softmax function in the last layer which gives you those strange-looking float values.
Now, basically what the softmax function is doing here is that it normalizes the input values given to the function and normalizes them in range (0, 1) and all the components will add up to 1. You can read more about the softmax function here: https://en.wikipedia.org/wiki/Softmax_function.
On how can you find the prediction label id you just need to find the maximum value's index in the array and you will have your label id they are pointing to.
You can use numpy argmax function to find maximum values index in multidimensional arrays. You can refer here: https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
Okay so I'm pretty new to deep learning and have a very basic doubt. I have an input data with an array containing 255 data (Araay shape (255,)) in epochs_data and their corresponding labels in new_labels (Array shape (255,)).
I split the data using the following code:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(epochs_data, new_labels, test_size = 0.2, random_state=30)
I'm using a sequential model:
from keras.models import Sequential
from keras import layers
from keras.layers import Dense, Activation, Flatten
model = Sequential()
I know how to code for the hidden layers and output layer:
model.add(Dense(500, activation='relu')) #Hidden Layer
model.add(Dense(2, activation='softmax')) #Output Layer
But I don't know how to code layer for input with the input_shape specified. The X_train is the input.It's an array of shape (180,). Also tell me how to code the model.fit() for the same. Any help is appreciated.
You have to copy this line before the hidden layer. You can add the activation function that you want. Finally, as you can see this line represent both the input layer and the 1° hidden layer (you have to choose the n° of neuron (I put 100) )
model.add(Dense(100, input_shape = (X_train.shape[1],))
EDIT:
Before fitting your model you have to configure your model with this line:
model.compile(loss = 'mse', optimizer = 'Adam', metrics = ['mse'])
So you have to choose a metric that in this case is Mean Squarred Error and an optimizer like Adam, Adamax, ect.
Then you can fit your model choosing the data (X,Y), n° epochs, val_split and the batch size.
history = model.fit(X_train, y_train, epochs = 200,
validation_split = 0.1, batch_size=250)
I'm trying to train a deep classifier in Keras both with and without pretraining of the hidden layers via stacked autoencoders. My problem is that the pretraining seems to drastically degrade performance (i.e. if pretrain is set to False in the code below the training error of the final classification layer converges much faster). This seems completely outrageous to me given that pretraining should only initialize the weights of the hidden layers and I don't see how that could completely kill the models performance even if that initialization does not work very well. I can not include the specific dataset I used but the effect should occur for any appropriate dataset (e.g. minist). What is going on here and how can I fix it?
EDIT: code is now reproducible with the MNIST data, final line prints change in loss function, which is significantly lower with pre-training.
I have also slightly modified the code and added sample learning curves below:
from functools import partial
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.utils import to_categorical
(inputs_train, targets_train), _ = mnist.load_data()
inputs_train = inputs_train[:1000].reshape(1000, 784)
targets_train = to_categorical(targets_train[:1000])
hidden_nodes = [256] * 4
learning_rate = 0.01
regularization = 1e-6
epochs = 30
def train_model(pretrain):
model = Sequential()
layer = partial(Dense,
activation='sigmoid',
kernel_initializer='random_normal',
kernel_regularizer=l2(regularization))
for i, hn in enumerate(hidden_nodes):
kwargs = dict(units=hn, name='hidden_{}'.format(i + 1))
if i == 0:
kwargs['input_dim'] = inputs_train.shape[1]
model.add(layer(**kwargs))
if pretrain:
# train autoencoders
inputs_train_ = inputs_train.copy()
for i, hn in enumerate(hidden_nodes):
autoencoder = Sequential()
autoencoder.add(layer(units=hn,
input_dim=inputs_train_.shape[1],
name='hidden'))
autoencoder.add(layer(units=inputs_train_.shape[1],
name='decode'))
autoencoder.compile(optimizer=SGD(lr=learning_rate, momentum=0.9),
loss='binary_crossentropy')
autoencoder.fit(
inputs_train_,
inputs_train_,
batch_size=32,
epochs=epochs,
verbose=0)
autoencoder.pop()
model.layers[i].set_weights(autoencoder.layers[0].get_weights())
inputs_train_ = autoencoder.predict(inputs_train_)
num_classes = targets_train.shape[1]
model.add(Dense(units=num_classes,
activation='softmax',
name='classify'))
model.compile(optimizer=SGD(lr=learning_rate, momentum=0.9),
loss='categorical_crossentropy')
h = model.fit(
inputs_train,
targets_train,
batch_size=32,
epochs=epochs,
verbose=0)
return h.history['loss']
plt.plot(train_model(pretrain=False), label="Without Pre-Training")
plt.plot(train_model(pretrain=True), label="With Pre-Training")
plt.xlabel("Epoch")
plt.ylabel("Cross-Entropy")
plt.legend()
plt.show()