I used the first example here as an example of network.
How to stop the training when the loss reach a fixed value ?
So, for example, I would like to fix a maximum of 3000 epochs and the training will stop when the loss will be under 0.2.
I read this topic but it is not the solution I found.
I would want to stop the training when the loss reach a value, not when there is no improvement like with this function proposed in the precedent topic.
Here is the code:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='relu', input_dim=20))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
model.fit(x_train, y_train,
epochs=3000,
batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
You can use some method like this if you would switch to TensorFlow 2.0:
class haltCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('loss') <= 0.05):
print("\n\n\nReached 0.05 loss value so cancelling training!\n\n\n")
self.model.stop_training = True
You just need to create a callback like that and then add that callback to your model.fit so it becomes something like this:
model.fit(x_train, y_train,
epochs=3000,
batch_size=128,
callbacks=['trainingStopCallback'])
This way, the fitting should stop whenever it reaches down below 0.05 (or whatever value you put on while defining it).
Also, since it's been a long time you asked this question but it still has no actual answer for using it with TensorFlow 2.0, I updated your code snippet to TensorFlow 2.0 so everyone can now easily find and use it with their new projects.
import tensorflow as tf
# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = tf.keras.utils.to_categorical(
np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = tf.keras.utils.to_categorical(
np.random.randint(10, size=(100, 1)), num_classes=10)
model = tf.keras.models.Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(tf.keras.layers.Dense(64, activation='relu', input_dim=20))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
class haltCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('loss') <= 0.05):
print("\n\n\nReached 0.05 loss value so cancelling training!\n\n\n")
self.model.stop_training = True
trainingStopCallback = haltCallback()
sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy', 'loss'])
model.fit(x_train, y_train,
epochs=3000,
batch_size=128,
callbacks=['trainingStopCallback'])
score = model.evaluate(x_test, y_test, batch_size=128)
Documentation here : EarlyStopping
keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None)
I solved it by doing this:
history = model.fit(training1, training2, epochs=100, verbose=True)
loss=history.history["loss"]
for x in loss:
if x <= 1:
print("Reached 1 loss value, cancelling training")
model.stop_training = True
break
Related
I'm working on a reviews classification model with only two categories 0 (negative) and 1 (positive). I'm using pre-trained word2vec from google with LSTM. The problem is I get an accuracy of around 50% where it should be around 83% according to this paper. I tried many different hyperparameters combination and still gets a horrible accuracy. I also tried to change the data preprocessing techniques and tried stemming but it hasn't resolved the problem
here's my code
X, y = read_data()
X = np.array(clean_text(X)) #apply data preprocessing
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
#converts text to sequence and add padding zeros
sequence = tokenizer.texts_to_sequences(X)
X_data = pad_sequences(sequence, maxlen = length, padding = 'post')
X_train, X_val, y_train, y_val = train_test_split(X_data, y, test_size = 0.2)
#Load the word2vec model
word2vec = KeyedVectors.load_word2vec_format(EMBEDDING_FILE, binary=True)
word_index = tokenizer.word_index
nb_words = min(MAX_NB_WORDS, len(word_index))+1
embedding_matrix = np.zeros((nb_words, EMBEDDING_DIM))
null_words = []
for word, i in word_index.items():
if word in word2vec.wv.vocab:
embedding_matrix[i] = word2vec.word_vec(word)
else:
null_words.append(word)
embedding_layer = Embedding(embedding_matrix.shape[0], # or len(word_index) + 1
embedding_matrix.shape[1], # or EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=701,
trainable=False)
model = Sequential()
model.add(embedding_layer)
model.add(LSTM(100))
model.add(Dropout(0.4))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=32, epochs=2, validation_data=(X_val, y_val), workers = -1, verbose=1)
score, acc = model.evaluate(X_val, y_val, batch_size=64)
I also tried other optimizers like AdaMax and MSLE loss function and no matter how much I increase the epoch or change the batch size the accuracy never gets better. I'm just so confused if the problem isn't with the model and preprocessing where could it be? Thanks
Few things I noted,
Why do you have the trainable=False it is restricting your model, so that the model cannot finetune the embedding. Having to learn a problem using a fixed set of embedding is difficult than using trainable embedding. Therefore, try setting trainable=True.
embedding_layer = Embedding(embedding_matrix.shape[0], # or len(word_index) + 1
embedding_matrix.shape[1], # or EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=701,
trainable=False)
Second problem is that you are using 2 units with sigmoid activation and binary_crossentropy. This combination doesn't work. You have two options.
model = Sequential()
...
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Option 1
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
If you pick this option, note that your labels need to be [sample size, 1] shape.
Option 2
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
As an example of network, I used the first example here
I would like to use tensorboard with this network. After read this documentation about how to use TensorBoard, I added these commands to the code:
from keras.callbacks import TensorBoard
TensorBoard("Directory path that contains the log files")
The output sounds correct:
Out[3]: <keras.callbacks.TensorBoard at 0x7f14730e79b0>
But there is nothing in the directory...
What I did wrong ?
Here is the complete code:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import TensorBoard
# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='relu', input_dim=20))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
model.fit(x_train, y_train,
epochs=20,
batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
TensorBoard("Directory path that contains the log files")
You need to pass the callback to model.fit:
tb = TensorBoard('log_dir')
model.fit(x_train, y_train,
epochs=20,
batch_size=128,
callbacks=[tb])
I followed a tutorial on youtube and I accidentally didn't add model.add(Dense(6, activation='relu')) on Keras and I got 36% accuracy. After I added this code it rised to 86%. Why did this happen?
This is the code
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
np.random.seed(3)
classifications = 3
dataset = np.loadtxt('wine.csv', delimiter=",")
X = dataset[:,1:14]
Y = dataset[:,0:1]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.66,
random_state=5)
y_train = keras.utils.to_categorical(y_train-1, classifications)
y_test = keras.utils.to_categorical(y_test-1, classifications)
model = Sequential()
model.add(Dense(10, input_dim=13, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(6, activation='relu')) # This is the code I missed
model.add(Dense(6, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(classifications, activation='softmax'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=
['accuracy'])
model.fit(x_train, y_train, batch_size=15, epochs=2500, validation_data=
(x_test, y_test))
Number of layers is an hyper parameter just like learning rate,no of neurons.
These play an important role in determining the accuracy.
So in your case.
model.add(Dense(6, activation='relu'))
This layer played the key roll.
We cannot understand what exactly these layers are actually doing.
The best we can do is to do hyper parameter tuning to get the best combination of hyper parameters.
In my opinion, maybe it's the ratio of your training set to your test set. You have 66% of your test set, so it's possible that training with this model will be under fitting. So one less layer of dense will have a greater change in the accuracy . You put test_size = 0.2 and try again the change in the accuracy of the missing layer.
I am currently working on handwritten digit recognition of regional languages. Currently, I am focusing on Oriya. I test the MNIST dataset through the CNN model and I am trying to apply the model on my Oriya dataset. Model is performing poorly. It is giving the wrong predictions. I have a dataset of 4971 samples.
How to improve the accuracy?
Here's my code:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD,RMSprop,adam
from keras.utils import np_utils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import os
import theano
from PIL import Image
from numpy import *
# SKLEARN
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# input image dimensions
img_rows, img_cols = 28, 28
# number of channels
img_channels = 1
path2 = '/home/saumya/Desktop/Oriya/p' #path of folder of images
imlist = os.listdir(path2)
im1 = array(Image.open('/home/saumya/Desktop/Oriya/p' + '/'+ imlist[0])) # open one image to get size
m,n = im1.shape[0:2] # get the size of the images
imnbr = len(imlist) # get the number of images
# create matrix to store all flattened images
immatrix = array([array(Image.open('/home/saumya/Desktop/Oriya/p' + '/'+ im2)).flatten()
for im2 in imlist],'f')
label=np.ones((num_samples,),dtype = int)
label[1:503]=0
label[503:1000]=1
label[1000:1497]=2
label[1497:1995]=3
label[1995:2493]=4
label[2493:2983]=5
label[2983:3483]=6
label[3483:3981]=7
label[3981:4479]=8
label[4479:4972]=9
print(label[1000])
data,Label = shuffle(immatrix,label, random_state=2)
train_data = [data,Label]
img=immatrix[2496].reshape(img_rows,img_cols)
plt.imshow(img)
plt.show()
(X, y) = (train_data[0],train_data[1])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(1, 28, 28), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
#model.add(Conv2D(64, (5, 5), input_shape=(1, 10, 10), activation='relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax', name = 'first_dense_layer'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# build the model
model = baseline_model()
# Fit the model
hist=model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=100, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
score = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', score[0])
print('Test accuracy:', score[1])
test_image = X_test[0:1]
print (test_image.shape)
print(model.predict(test_image))
print(model.predict_classes(test_image))
print(y_test[0:1])
# define the larger model
def larger_model():
# create model
model = Sequential()
model.add(Conv2D(30, (5, 5), input_shape=(1, 28, 28), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(15, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(50, activation='relu', name='first_dense_layer'))
model.add(Dense(num_classes, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# build the model
model = larger_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Large CNN Error: %.2f%%" % (100-scores[1]*100))
I am trying to resize my model using opencv, it is generating the following error:
/root/mc-x64-2.7/conda-bld/opencv-3_1482254119970/work/opencv-3.1.0/modules/imgproc/src/imgwarp.cpp:3229: error: (-215) ssize.area() > 0 in function resize
How to improve the accuracy?
A bit hard to give a detailed answer from what you posted, and without seeing some data sample, but still will attempt a stab at this. What I can see that may help improve you accuracy is:
Get more data. In Deep Learning one usually works with big amount of data, and models almost always improve when adding more data. If you can't obtain new data you may try to generate more samples with the ones you got, by adding noise or similar modifications.
I see you currently have 30 and 10 epochs on the training of your model. I suggest you increase the number of epochs, so your model has more time to converge. This also most of the times improves performance up to a point.
I also see that your batch size is 100 and 200 on your models. You can try reducing the batch size of your training process, so your training performs gradient update more times on each epoch (remember that you can even use batch_size=1 to upgrade your model for each sample, instead of batches).
Alternatively, you can try iteratively increasing the complexity and layers of your architecture and compare your performances. It is best to start with a simple model, train and test, and then add layers and nodes until you are satisfied with the results. I also see you have tried a hybrid convolutional and non-convolutional approach; you can well try starting with just one of the approaches before increasing the complexity of your architecture.
I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)