I'm attempting to use look ahead with RAdam as the inner optimizer. I've tried Look Ahead with Adam as the inner optimizer and seperately I've used RAdam as the optimizer with good results. However, when I try to use Look Ahead with RAdam accuracy actually gets worse during training.
I've verified that training goes well when these optimizers are run separately. I've also tried with and without warmup using RAdam.
from keras_lookahead import Lookahead
from keras_radam import RAdam
batch_size = 128
num_classes = 10
epochs = 20
Ranger=Lookahead(RAdam(total_steps=10000, warmup_proportion=0.1, min_lr=1e-5))
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=Ranger,
metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
The model trains but accuracy starts poor (.0851) and gets worse (.0836). Any ideas why
Related
I use python to multi-class text classification , my data set contains 25000 Arabic tweets divided into 10 classes[sport, politics,....]
When I use
training = pd.read_csv('E:\cluster data\One_File_nonnormalizenew2norm.txt', sep="*")
training.dropna(inplace=True)
training.columns = ["text", "class1"]
training['class1'] = training.class1.astype('category').cat.codes
training.dropna(inplace=True)
# create our training data from the tweets
text = training['text']
y = (training['class1'])
from sklearn.model_selection import train_test_split
sentences_train, sentences_test, y_train, y_test = train_test_split(text, y, test_size=0.25, random_state=1000)
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vectorizer.fit(sentences_train)
X_train = vectorizer.transform(sentences_train)
X_test = vectorizer.transform(sentences_test)
X_train
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
score = classifier.score(X_test, y_test)
print("Accuracy:", score)
Accuracy: 0.9525099601593625
When I use keras:
model = Sequential()
max_words=5000
model.add(Dense(512, input_shape=(input_dim,), activation='softmax'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='softmax'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='softmax'))
model.add(Dense(10))
model.summary()
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=150, epochs=5, verbose=1, validation_split=0.3,shuffle=True)
predicted = model.predict(X_test)
predicted = np.argmax(predicted, axis=1)
accuracy_score(y_test, predicted)
0.28127490039840636
where the mistake???
update
I change the code to:
model = Sequential()
max_words=5000
model.add(Dense(512, input_shape=(input_dim,)))
model.add(Dropout(0.5))
model.add(Dense(256))
model.add(Dropout(0.5))
#model.add(Dense(1,activation='sigmoid'))####
model.add(Dense(10))
model.summary()
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(X_train, y_train,batch_size=150,epochs=10,verbose=1,validation_split=0.3,shuffle=True)
predicted = model.predict(X_test)
predicted = np.argmax(predicted, axis=1)
accuracy_score(y_test, predicted)
0.7201593625498008
still bad accuracy!!!
Some ideas.
Remove all softmax activations (as #Matias said).
Remove the model.add(Dense(1,activation='softmax')), it's probably destroying your results.
Do more than 5 epochs.
You are not using the same tweets for validation in the two approaches.
You should probably give the accuracy on both the training and the testing datasets to be sure what is going on.
I use the following code (courtesy to here) which runs CNN for training MNIST images:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 1
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
print(model.save_weights('file.txt')) # <<<<<----
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
My goal is to use CNN model to extract MNIST features into a dataset that I can use as an input for another classifier. In this example, I don't care about the classification operation since all I need is the features of the trained images. The only method I found is save_weights as:
print(model.save_weights('file.txt'))
How can I extract features into a dataset from keras model?
After training or loading the existing trained model, you can create another model:
extract = Model(model.inputs, model.layers[-3].output) # Dense(128,...)
features = extract.predict(data)
and use the .predict method to return the vectors from a specific layer, in this case every image will become (128,), the output of the Dense(128, ...) layer.
You can also train these networks jointly with 2 outputs using the functional API. Follow the guide and you'll see that you can chain models together and have multiple outputs each possibly with a separate loss. This will allow your model to learn shared features that is useful for both classifying the MNIST image and your task at the same time.
I use a recurrent network (in special GRU) for predict a time serie with a lenght of 90 occurrences. The type of data is multivariante, and a follow this example.
Multivariante Time Series
Option 1:
I use keras for develop the rnn
n_train_quarter = int(len(serie) * 0.75)
train = values[:n_train_quarter, :]
test = values[n_train_quarter:, :]
X_train, y_train = train[:, :-1], train[:, - 1]
X_test, y_test = test[:, :-1], test[:, - 1]
# All parameter can be changes kernel, activation, optimizer, ...
model = Sequential()
model.add(GRU(64, input_shape=(X_train.shape[1], X_train.shape[2]),return_sequences=True))
model.add(Dropout(0.5))
# n is random
for i in range(n)
model.add(GRU(64,kernel_initializer = 'uniform', return_sequences=True))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('softmax'))
#Compile and fit
model.compile(loss='mean_squared_error', optimizer='SGD')
early_stopping = EarlyStopping(monitor='val_loss', patience=50)
checkpointer = ModelCheckpoint(filepath=Checkpoint_mode, verbose=0, save_weights_only=False, save_best_only=True)
model.fit(X_train, y_train,
batch_size=256,
epochs=64,
validation_split=0.25,
callbacks=[early_stopping, checkpointer],
verbose=0,
shuffle=False)
And the result with less error look like the image (there are various experiment with same result)
Option 2:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, random_state = 42)
# All parameter can be changes kernel, activation, optimizer, ...
model = Sequential()
model.add(GRU(64, input_shape=(X_train.shape[1], X_train.shape[2]),return_sequences=True))
model.add(Dropout(0.5))
# n is random
for i in range(n)
model.add(GRU(64,kernel_initializer = 'uniform', return_sequences=True))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('softmax'))
#Compile and fit
model.compile(loss='mean_squared_error', optimizer='SGD')
early_stopping = EarlyStopping(monitor='val_loss', patience=50)
checkpointer = ModelCheckpoint(filepath=Checkpoint_mode, verbose=0, save_weights_only=False, save_best_only=True)
model.fit(X_train, y_train,
batch_size=256,
epochs=64,
validation_split=0.25,
callbacks=[early_stopping, checkpointer],
verbose=0,
shuffle=False)
And the result with less error print as
Can use "train_test_split" of sklearn with random select for the data?
Why is better the result with secuential data than with random selection data, if GRU is better with secuential data?
I am trying out the mnist tutorial using keras at https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py but am getting really low accuracy. I got a test accuracy of 83% on my 1st trial after 12 epochs and 75% on my 2nd trial. The page claims that it is able to get 99.25% test accuracy after 12 epochs. I am not sure why this is happening because I literally copy pasted the code into PyCharm.
I am using Keras version 2.0.0 with Tensorflow version 0.12.1 as its backend. Training is done via CPU.
Could the reason be due to the version? Should I upgrade to the latest versions of Keras and Tensorflow? I hope someone can run the exact code and let me know of their results after 12 epochs with the Keras and Tensorflow version.
Or could the reason be due to the variability in deep learning and that I might eventually get a ~99% test accuracy after 12 epochs if I run enough simulations. This is the code in case anyone doesn't wanna get into the link
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)