I am new to machine learning and keras library and I made a CNN code for regression like below.
%matplotlib inline
from __future__ import division
import numpy as np
from numpy.random import rand
import matplotlib.pyplot as plt
def initial_spin_state(N):
state = np.random.choice((0.11111, 0.99999), (N, N))
return state
def metropolis_algorithm(config, beta):
N = len(config)
for i in range(N):
for j in range(N):
a = np.random.randint(0, N)
b = np.random.randint(0, N)
s = config[a, b]
near=config[(a+1)%N,b] + config[a,(b+1)%N] + config[(a-1)%N,b] + config[a,(b-1)%N]
delta = 2 * s *near
if delta < 0:
s *= -1
elif rand() < np.exp(-delta * beta):
s *= -1
config[a, b] = s
return config
def get_energy(config):
energy = 0
N = len(config)
for i in range(N):
for j in range(N):
S = config[i, j]
near = config[(i+1)%N, j] + config[i,(j+1)%N] + config[(i-1)%N, j] + config[i,(j-1)%N]
energy += near*S
return energy
x_train = []
y_train = []
for i in range(50000):
config = initial_spin_state(16)
energy = get_energy(config)
x_train.append(config)
y_train.append(energy)
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape)
print(y_train.shape)
x_test = []
y_test = []
for j in range(20000):
config = initial_spin_state(16)
energy = get_energy(config)
x_test.append(config)
y_test.append(energy)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_test.shape)
print(y_test.shape)
x_train = x_train.reshape(50000, 16, 16, 1)
x_test = x_test.reshape(20000, 16, 16, 1)
print(x_train.shape)
print(x_test.shape)
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adadelta
from keras.regularizers import l2
model = Sequential()
model.add(Conv2D(32, (2, 2), input_shape = (16, 16, 1), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(16, (2, 2), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
#model.add(Dense(1024, activation = 'relu'))
model.add(Dense(1))
np.random.seed(0)
model.summary()
model.compile(loss = "mse", metrics = ['accuracy'], optimizer = 'adam')
%%time
hist = model.fit(x_train, y_train, epochs = 200, batch_size = 500,
validation_data = (x_test, y_test), verbose = 2)
import matplotlib.pyplot as plt
plt.plot(hist.history['acc'], '_b', label = "training")
plt.plot(hist.history['val_acc'], 'r:', label = "test")
plt.legend()
plt.grid("on")
plt.show()
this code is for image input, and contiuous energy value output.
so if I put a image(ising configuration) to CNN, it should predict a energy for the configuration.
the problem is..
when I train CNN, training loss and validation loss is decreased very slowly.
of course, training accuracy and validation accuracy is incread very slowly.
and, sometimes only training accuracy is increasd, val-accuracy is not increased.
genious guys.. what`s wrong with my code??
teach me plz
First of all, since you are doing regression problem, I dont think its a good idea to use acc as your metric, instead, you might consider using mean absolute error mae as your matric.
The loss you are using is mse (mean squared error), so the value would be pretty large especially when you are not normalising your y values. However, after running ~25 epochs of your provided code, the validation loss dropped to 290.xx with 13.xx of mae (And it is not converging yet). And I tried to use your model to predict some validation data, it works fine. Maybe you should test your model before you assume there is something went wrong.
Related
I wanna use predictions in real time, but I want to test that the results are the same as in the case of giving all the data as input only once. This is the code:
import numpy as np
from keras.layers import Input, Dense, SimpleRNN
from keras.models import Model
#MODEL 1, for training
x = Input(shape=(None, 1))
h = SimpleRNN(30, return_sequences=True)(x)
out = Dense(1)(h)
model = Model(x, out)
model.compile(loss='mse', optimizer='adam')
X_train = np.random.rand(100, 50, 1)
y_train = np.random.rand(100, 50, 1)
model.fit(X_train, y_train, verbose = False)
#MODEL 1, for predictions in real time
x = Input(batch_shape=(1, None, 1))
h = SimpleRNN(30, stateful=True, return_sequences=True)(x)
out = Dense(1)(h)
predict_model = Model(x, out)
predict_model.set_weights(model.get_weights())
X = np.random.rand(2, 2, 1)
predictions = model.predict(X, verbose = False)
for sim in range(len(predictions)):
for i in range(len(predictions[0])):
pred = predict_model.predict(X[sim:(sim+1), i:(i + 1), :], verbose = False)
print(pred[0][0]) #Predictions in real time
print(predictions[sim][i]) #Predictions with MODEL 1
print()
predict_model.reset_states()
It prints this:
[0.09156141]
[0.09156139]
[-0.38076958]
[-0.38076955]
[0.12214336]
[0.12214339]
[-0.52013564]
[-0.5201356]
The results must be exactly the same because both have the same weights. What is happening?
I'm trying to learn autoencoder by implementing it but model.fit crashes my vscode when the batch_size or epochs is set at a high number. I can also see that my memory reaches 100% and vscode becomes unresponsive
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn import metrics
#define input and output
n = 19
fib = [1,1]
for i in np.arange(2, n+1):
fib.append(fib[i-1] + fib[i-2])
x = np.array([fib])
y = np.array([fib])
print(x)
print(x.shape)
print(y.shape)
plt.scatter(x,y)
#building neural network
model = Sequential()
model.add(Dense(2, input_dim= x.shape[1], activation = 'relu'))
model.add(Dense(x.shape[1]))
model.compile(loss= 'mean_squared_error', optimizer = 'adam')
model.summary()
#model fit
model.fit(y,y, verbose = 1, batch_size = 4, epochs = 1000)
pred = model.predict(y)
I am trying to build a LSTM model for crypto currency prediction just for fun.
I managed to build & compile my LSTM model. However, I couldn't success to predict future dates.
I have checked these solutions so far;
How to use the LSTM model for multi-step forecasting?
Forecast future values with LSTM in Python
How to predict actual future values after testing the trained LSTM model?
I couldn't implement these solutions into my code.
A summary of my dataset is like (simple bitcoin prices):
open,close,high,low,volume,time,date
4331.6,4354.43,4394.47,4303.29,3841.525758,1543438799,2018-11-28 23:59:59
4356.23,4243.57,4359.13,4218.79,4434.861032,1543442399,2018-11-29 00:59:59
4243.57,4236.09,4266.0,4185.01,4347.171442,1543445999,2018-11-29 01:59:59
4236.4,4264.85,4279.9,4215.8,2999.814805,1543449599,2018-11-29 02:59:59
First preparing & scaling my data:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.optimizers import adam_v2
from keras.layers import Dense, LSTM, LeakyReLU, Dropout
data = pd.read_csv('bitcoin.csv')
price = data.filter(['close'])
min_max_scaler = MinMaxScaler()
norm_data = min_max_scaler.fit_transform(price.values)
And then splitting my test & train data from original data.
def univariate_data(dataset, start_index, end_index, history_size, target_size):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i)
data.append(np.reshape(dataset[indices], (history_size, 1)))
labels.append(dataset[i+target_size])
return np.array(data), np.array(labels)
past_history = 5
future_target = 0
TRAIN_SPLIT = int(len(norm_data) * 0.75)
x_train, y_train = univariate_data(norm_data, 0, TRAIN_SPLIT, past_history, future_target)
x_test, y_test = univariate_data(norm_data, TRAIN_SPLIT, None, past_history, future_target)
And finally i compile my model & predict.
num_units = 64
learning_rate = 0.0001
activation_function = 'sigmoid'
adam = adam_v2.Adam(learning_rate=learning_rate)
loss_function = 'mse'
batch_size = 5
num_epochs = 64
model = Sequential()
model.add(LSTM(units = num_units, activation=activation_function, input_shape=(None, 1)))
model.add(LeakyReLU(alpha=0.5))
model.add(Dropout(0.1))
model.add(Dense(units = 1))
model.compile(optimizer=adam, loss=loss_function)
history = model.fit(
x_train,
y_train,
validation_split=0.1,
batch_size=batch_size,
epochs=num_epochs,
shuffle=False
)
model.save('bitcoin.h5')
test_predict = model.predict(x_test)
train_predict = model.predict(x_train)
The result is satisfying for me. But instead of predicting a train data, I want to predict the future by using this model. (For example next 100 rows...)
I am learning numpy & pandas and all other libraries used in this example.
i created a convLSTM network that classifies videos. The dataset consist of 6 classes with videos inside it.
I get the error "NameError: name 'y_pred' is not defined" and it's related to the third last line of code. You don't need to run the code because it's probably a syntax error or something like this.
Why i get this error ?
import keras
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import keras_metrics as km
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import multilabel_confusion_matrix
data_dir = "video_data/"
img_height , img_width = 64, 64
seq_len = 70
classes = ["Apply Eye Makeup", "Archery", "Apply Lipstick", "Baby Crawling", "Balance Beam", "Band Marching"]
# Creating frames from videos
def frames_extraction(video_path):
frames_list = []
vidObj = cv2.VideoCapture(video_path)
# Used as counter variable
count = 1
while count <= seq_len:
success, image = vidObj.read()
if success:
image = cv2.resize(image, (img_height, img_width))
frames_list.append(image)
count += 1
else:
print("Defected frame")
break
return frames_list
def create_data(input_dir):
X = []
Y = []
classes_list = os.listdir(input_dir)
for c in classes_list:
print(c)
files_list = os.listdir(os.path.join(input_dir, c))
for f in files_list:
frames = frames_extraction(os.path.join(os.path.join(input_dir, c), f))
if len(frames) == seq_len:
X.append(frames)
y = [0]*len(classes)
y[classes.index(c)] = 1
Y.append(y)
X = np.asarray(X)
Y = np.asarray(Y)
return X, Y
X, Y = create_data(data_dir)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, shuffle=True, random_state=0)
model = Sequential()
model.add(ConvLSTM2D(filters = 64, kernel_size = (3, 3), return_sequences = False, data_format = "channels_last", input_shape = (seq_len, img_height, img_width, 3)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(6, activation = "softmax"))
model.summary()
opt = keras.optimizers.SGD(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])
earlystop = EarlyStopping(patience=7)
callbacks = [earlystop]
history = model.fit(x = X_train, y = y_train, epochs=40, batch_size = 8 , shuffle=True, validation_split=0.2, callbacks=callbacks)
y_pred = np.argmax(y_pred, axis = 1)
y_test = np.argmax(y_test, axis = 1)
print(classification_report(y_test, y_pred))
The error is self-explanatory from the message: y_pred is not defined...
You have to predict your data first, doing something like y_pred = model.predict(X_test) then, you can argmax, etc.
[EDIT]: to sum up, replace your three last lines with that
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis = 1)
print(classification_report(y_test, y_pred))
After spending days failing to use neural network for Q learning, I decided to go back to the basics and do a simple function approximation to see if everything was working correctly and see how some parameters affected the learning process.
Here is the code that I came up with
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
regressor = Sequential()
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='sgd')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=2, verbose=1, batch_size=32)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
The problem is that all my predictions are around 0 in value. As you can see I used an ExtraTreesRegressor from sklearn (commented lines) to check that the protocol is actually correct. So what is wrong with my neural network ? Why is it not working ?
(The actual problem that I'm trying to solve is to compute the Q function for the mountain car problem using neural network. How is it different from this function approximator ?)
With these changes:
Activations to relu
Remove kernel_initializer (i.e. leave the default 'glorot_uniform')
Adam optimizer
100 epochs
i.e.
regressor = Sequential()
regressor.add(Dense(units=20, activation='relu', input_dim=1))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='adam')
regressor.fit(X, Y, epochs=100, verbose=1, batch_size=32)
and the rest of your code unchanged, here is the result:
Tinker, again and again...
A more concise version of your code that works:
def data_gen():
while True:
x = (np.random.random([1024])-0.5) * 10
y = np.sin(x)
yield (x,y)
regressor = Sequential()
regressor.add(Dense(units=20, activation='tanh', input_dim=1))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(loss='mse', optimizer='adam')
regressor.fit_generator(data_gen(), epochs=3, steps_per_epoch=128)
x = (np.random.random([1024])-0.5)*10
x = np.sort(x)
y = np.sin(x)
plt.plot(x, y)
plt.plot(x, regressor.predict(x))
plt.show()
Changes made: replacing low layer activations with hyperbolic tangents, replacing the static dataset with a random generator, replacing sgd with adam. That said, there still are problems with other parts of your code that I haven't been able to locate yet (most likely your scaler and random process).
I managed to get a good approximation by changing the architecture and the training as in the following code. It's a bit of an overkill but at least I know where the problem was coming from.
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import ExtraTreesRegressor
from keras import optimizers
regressor = Sequential()
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1, activation='sigmoid'))
regressor.compile(loss='mean_squared_error', optimizer='adam')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=50, verbose=1, batch_size=2)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
However I'm still baffled that I found papers saying that they were using only two hidden layers of five neurons to approximate the Q function of the mountain car problem and training their network for only a few minutes and get good results. I will try changing my batch size in my original problem to see what results I can get but I'm not very optimistic